# Task 1 Best solution
This is our solution for the First task of the Tensor Tournament in 2024. For this exact problem a less sophisticated solution as presented here is the optimal predictor.

In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load training and testing data
df_train = pd.read_csv("./data/task1/train.csv")
df_train = df_train.reindex(sorted(df_train.columns), axis=1)

y_train = df_train["time_remaining"]
X_train = df_train.drop(columns=["time_remaining"])

X_test = pd.read_csv("./data/task1/test.csv")
X_test = X_test.reindex(sorted(X_test.columns), axis=1)

# Initialize Label Encoders for each categorical column
encoders = {}

for col in X_train.columns:
    if X_train[col].dtype == 'object':  # assuming categorical columns are type 'object'
        le = LabelEncoder()
        le.fit(X_train[col])  # Fit label encoder
        X_train[col] = le.transform(X_train[col])  # Transform training data
        X_test[col] = le.transform(X_test[col])  # Transform testing data using trained encoder
        encoders[col] = le  # Store encoder

# Initialize and train the Linear Regression model
linear_regressor = LinearRegression()
linear_regressor.fit(X_train, y_train)

# Predict remaining time and ensure the shape meets the requirement
time_remaining = linear_regressor.predict(X_test)
time_remaining = time_remaining.reshape(-1)  # This will also ensure shape is (2000,)

# Save the predictions in Numpy format
np.save("time_remaining.npy", time_remaining)  # This will save the array to time_remaining.npy

In [3]:
time_remaining.shape

(2000,)

In [6]:
time_remaining.dtype

dtype('float64')

In [5]:
time_remaining

array([859.86353243, 862.67753805, 951.44125108, ..., 857.23252308,
       903.55035409, 898.13250452])