In [37]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV

from sklearn import svm, neighbors, tree
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, BaggingRegressor # ???

from sklearn.metrics import accuracy_score

In [9]:
# DATA EXTRACTION
train_df = pd.read_csv('data/TRAIN_DATA.csv', sep=';', encoding='utf_8')
final_test_df = pd.read_csv('data/TEST_INPUT.csv', sep=';', encoding='utf_8')

In [21]:
# SPLIT DATA
# Data is already sepparated between train and test sets
# but we are not provided with the testing results thus we wont use them for testing the models, only to provide final results 
# that will later be checked. Therefore, split in the train data is still needed.

features = list(train_df.columns)
target = 'WG'
features.remove(target)

X = train_df[features]
y = train_df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,  
                                                    random_state=0,)

In [28]:
# PREPROCESSING
# We only work with numerical features this time

numerical_features = X_train.select_dtypes(include=['float64', 'int']).columns.to_list()
numerical_transformer = Pipeline(
    steps = [
        ("scaler", StandardScaler()),
        ("missing_values", SimpleImputer(strategy="mean")),
    ])

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numerical_transformer, numerical_features),
],
    remainder='passthrough'
)

In [38]:
# REGRESSION MODELS WITH PIPELINE UNION
SV = Pipeline(
    steps=[("preprocessor", preprocessor), ("regressor", svm.SVR())]
)

Kneighbors = Pipeline(
    steps=[("preprocessor", preprocessor), ("regressor", neighbors.KNeighborsRegressor())]
)

Tree = Pipeline(
    steps=[("preprocessor", preprocessor), ("regressor", tree.DecisionTreeRegressor())]
)


In [None]:
# PARAM GRID FOR TWO HYPERPARAMETERS

SV_param_grid = {
    
}
