In [1]:
# Installs
!pip install polars
!pip install lets-plot
!pip install xgboost

# Imports
import polars as pl
import xgboost as xgb
import numpy as np
import optuna
import math
import pandas as pd
import statistics as stat

from lets_plot import *
from lets_plot.mapping import as_discrete
from sklearn import model_selection
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
import plotly
import plotly.figure_factory as ff

LetsPlot.setup_html()
plotly.offline.init_notebook_mode(connected = True)

Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable
Defaulting to user installation because normal site-packages is not writeable


ModuleNotFoundError: No module named 'optuna'

In [None]:
# Split into training and validation sets
df_train = df_train.filter(pl.col("DateTime") < pl.datetime(2017, 6, 1))
df_valid = df_train.filter(pl.col("DateTime") >= pl.datetime(2017, 6, 1))

In [None]:

df_train=pd.read_csv("Train.csv")

df_train=df_train.drop(columns='Unnamed: 0')
df_train_modified=df_train[['year','id arc', 'id noeud amont',
       'id noeud aval', 'etat trafic', 'etat arc', 'Temp', 'Pluie 1h',
       'Pluie 3h', 'Neige', 'Visibilité Horiz', 'cos_hour', 'sin_hour',
       'cos_day', 'sin_day', 'cos_month', 'sin_month', 'débit horaire',
       'taux occupation']]

In [None]:
# Suppress optuna log messages
optuna.logging.set_verbosity(optuna.logging.WARNING) 

# Optuna objective function
def objective_xgb(trial):
    """
    Optuna objective function. Returns
    the RMSE for an XGBoost model
    
    Assumes the training data are 
    polars data frames
    """
    # Get data for the XGBoost model
    xtrain = df_train.drop(["DateTime", "Junction", "Vehicles"]).to_numpy()
    xvalid = df_valid.drop(["DateTime", "Junction", "Vehicles"]).to_numpy()

    ytrain = df_train.get_column("Vehicles").to_numpy()
    yvalid = df_valid.get_column("Vehicles").to_numpy()
    
    dmat_train = xgb.DMatrix(xtrain, label = ytrain)
    dmat_valid = xgb.DMatrix(xvalid, label = yvalid)
    
    # Suggest hyperparameters for XGBoost
    params = {'objective': 'reg:squarederror',
              'eval_metric': 'rmse',
              'seed': 19970507,
              'eta': trial.suggest_float("eta", 1e-2, 0.25, log = True),
              'max_depth': trial.suggest_int("max_depth", 1, 7),
              'lambda': trial.suggest_float("lambda", 1e-8, 100.0, log = True),
              'alpha': trial.suggest_float("alpha", 1e-8, 100.0, log = True),
             }
    
    # To evaluate training progress (set verbose_eval = True)
    watchlist = [(dmat_train, 'train'), (dmat_valid, 'eval')]
    
    # Train the XGBoost model
    xgb_model = xgb.train(params, 
                          dtrain = dmat_train, 
                          num_boost_round = trial.suggest_int("num_boost_round", 20, 3000),
                          evals = watchlist,
                          verbose_eval = False)
    
    xgb_preds_valid = xgb_model.predict(dmat_valid) 
    
    # Return the RMSE
    return math.sqrt(mean_squared_error(yvalid, xgb_preds_valid))


# Set up and run the Optuna study
study_xgb = optuna.create_study(direction = 'minimize')
study_xgb.optimize(objective_xgb, n_trials = 10)

# Create a table showing the best parameters
xgb_table = [["Parameter", "Optimal Value from Optuna"],
            ["Iterations (num_boost_rounds)", study_xgb.best_params['num_boost_round']],
            ['Learning Rate (eta)', round(study_xgb.best_params['eta'], 3)],
            ['Max Depth (max_depth)', round(study_xgb.best_params['max_depth'], 3)],
            ['Lambda (lambda)', round(study_xgb.best_params['lambda'], 3)],
            ['Alpha (alpha)', round(study_xgb.best_params['alpha'], 3)]]

ff.create_table(xgb_table)