In [None]:
import optuna

from sklearn.metrics import mean_squared_error
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit

In [None]:
X = pd.read_csv('X_train.csv')
y = pd.read_csv('Y_train.csv')

In [None]:
# X: data features (n_samples, n_features)
# y: data targets (n_samples,)

tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_valid = X.iloc[train_index], X.iloc[test_index]
    y_train, y_valid = y.iloc[train_index], y.iloc[test_index]

In [None]:
MODEL_NAME2MODEL = {
    "LinearRegression" : LinearRegression,
    "Ridge" : Ridge,
    "Lasso" : Lasso,
    "RandomForest" : RandomForestRegressor,
    "GradientBoosting" : GradientBoostingRegressor,
    "LGBM" : LGBMRegressor,
    "XGB" : XGBRegressor,
}

In [None]:
def objective(trial):
    model_name = trial.suggest_categorical("Model", list(MODEL_NAME2MODEL.keys()))
    if model_name == "LinearRegression":
        params = {
            "fit_intercept": True
            }
    elif model_name == "Ridge":
        params = {
            "alpha": trial.suggest_float("alpha", 0.1, 1.0)
            }    
    elif model_name == "Lasso":
        params = {
            "alpha": trial.suggest_float("alpha", 0.1, 1.0)
            }
    elif model_name == "RandomForest":
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 50,1000),
            "max_depth": trial.suggest_int("max_depth", 5, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 4)
            }
    elif model_name == "GradientBoosting":
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 200,2000),
            "learning_rate": trial.suggest_float("learning_rate", 0.05, 0.5),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "min_samples_split": trial.suggest_int("min_samples_split", 2, 10),
            "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 4)
            }
    elif model_name == "LGBM":
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 200,2000),
            "learning_rate": trial.suggest_float("learning_rate", 0.05, 0.5),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
            "num_leaves": trial.suggest_int("num_leaves", 31, 100),
            "min_child_samples": trial.suggest_int("min_child_samples", 20, 100)
        }
    elif model_name == "XGB":
        params = {
            "n_estimators": trial.suggest_int("n_estimators", 200,2000),
            "learning_rate": trial.suggest_float("learning_rate", 0.05, 0.5),
            "max_depth": trial.suggest_int("max_depth", 3, 10),
       }

    model = MODEL_NAME2MODEL[model_name](**params)
    model.fit(X_train, y_train)
    prediction = model.predict(X_valid)
    return np.sqrt(mean_squared_error(y_valid, prediction))


In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)

In [None]:
best_params = study.best_params
best_model_name = best_params["Model"]
best_params.pop("Model", None)

Use the precedent data to put the params in the model in the `submission_working.py` file