In [None]:
import numpy as np
from pprint import pprint

import random 
import pandas as pd

from sklearn.datasets import make_regression
# from autosklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

import autosklearn
from autosklearn.regression import AutoSklearnRegressor

In [None]:
df = pd.read_pickle("data.pkl")
df = df[['input_features', 'pointType_21', 'pointType_0', 'pointType_1', 'pointType_2',
       'pointType_3', 'pointType_4', 'pointType_5', 'pointType_6',
       'pointType_7', 'pointType_8', 'pointType_9', 'pointType_10',
       'pointType_11', 'pointType_12', 'pointType_13', 'pointType_14',
       'pointType_15', 'pointType_16', 'pointType_17', 'pointType_18',
       'pointType_19', 'pointType_20', 'participant_number',
       'left_or_right', 'sample_number']]

In [None]:
df.columns

In [None]:
participant_list = df.participant_number.drop_duplicates().to_list()
random.Random(42).shuffle(participant_list)
print(participant_list)

participant_list_train, participant_list_test = train_test_split(participant_list, random_state=115,  train_size=0.85)

print(participant_list_train, participant_list_test)

In [None]:
# X, y = make_regression(n_samples=1000, n_features=10, n_informative=5, n_targets=3)

# X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

df_test = df[df['participant_number'].isin(participant_list_test)] #select test participants
df_test = df_test.sample(frac=1).reset_index(drop=True) #shuffle and reset_index
# df_test.reset_index(inplace=True,drop=True)
df_train = df[df['participant_number'].isin(participant_list_train)] #select train participants
df_train = df_train.sample(frac=1).reset_index(drop=True) #shuffle and reset_index
# df_train.reset_index(inplace=True,drop=True)

In [None]:
arr = df_train[[col for col in df_train.columns if 'pointType' in col]].values

arr_np = np.asarray(arr.tolist())
y_train = arr_np.reshape(arr_np.shape[0], arr_np.shape[1] * arr_np.shape[2])

x_train = np.asarray(df_train.input_features.tolist())

In [None]:
arr = df_test[[col for col in df_test.columns if 'pointType' in col]].values

arr_np = np.asarray(arr.tolist())
y_test = arr_np.reshape(arr_np.shape[0], arr_np.shape[1] * arr_np.shape[2])

x_test = np.asarray(df_test.input_features.tolist())

In [None]:
x_train.shape, y_train.shape , x_test.shape, y_test.shape

In [None]:
x_train_top_half = x_train[:,0:x_train.shape[1]//2]
y_train_top_half = y_train[:,0:30]
x_train_bottom_half = x_train[:,x_train.shape[1]//2:]
y_train_bottom_half = y_train[:,30:]

x_test_top_half = x_test[:,0:x_test.shape[1]//2]
y_test_top_half = y_test[:,0:30]
x_test_bottom_half = x_test[:,x_test.shape[1]//2:]
y_test_bottom_half = y_test[:,30:]

# make sure that original x and y are erased
x_train = None
y_train = None
x_test = None
y_test = None

In [None]:
x_train_top_half.shape, y_train_top_half.shape , x_test_top_half.shape, y_test_top_half.shape

In [None]:
x_train_bottom_half.shape, y_train_bottom_half.shape , x_test_bottom_half.shape, y_test_bottom_half.shape

In [None]:
def trainAndPredict(x_train, y_train , x_test, y_test, which_half):

    automl = AutoSklearnRegressor(
        time_left_for_this_task=3600,
        per_run_time_limit=150,
        seed = 14141,
        metric = autosklearn.metrics.mean_squared_error,
        
        # resampling_strategy = 'cv'
        resampling_strategy_arguments = {
        "shuffle": False,        # Whether to shuffle before splitting data
        # "folds": 3              # Used in 'cv' based resampling strategies
        }
    )
    automl.fit(x_train, y_train)
    
    print(automl.leaderboard())
    
    pprint(automl.show_models(), indent=4)
    
    predictions = automl.predict(x_test)
    print("Mean absolute error score:", mean_absolute_error(y_test, predictions))
    
    df_test['predictions'] = predictions.tolist()
    df_test['y_test'] = y_test.tolist()
    
    df_test.to_pickle("data_predicted_"+ which_half +"_time3600_perRun150.pkl")

In [None]:
trainAndPredict(x_train_top_half, y_train_top_half , x_test_top_half, y_test_top_half, "topHalf")

In [None]:
trainAndPredict(x_train_bottom_half, y_train_bottom_half , x_test_bottom_half, y_test_bottom_half, "bottomHalf")