# Train model

# 1. Imports

## 1.1 Packages

In [279]:
import os
import pandas as pd
import sys

In [280]:
sys.path.append("../src/")
from prepare_data import get_holidays
from feature_engineering import extract_date_features, add_lockdown_periods, add_holidays_period,\
    get_split_train_val_cv
from train_model import train_model_cv_mlflow

## 1.2 Options

In [281]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [282]:
path_data = "../data/processed"

feat_date = "Date"

## 1.3 Datasets

In [283]:
df_train = pd.read_pickle(os.path.join(path_data, "train.pkl"))
y_train = pd.read_csv(os.path.join(path_data, "target_train.csv"), index_col=0)

# 2. Prepare data

In [284]:
df_train = df_train.pipe(extract_date_features, feat_date=feat_date)\
    .pipe(add_lockdown_periods, feat_date=feat_date)\
    .pipe(add_holidays_period, feat_date=feat_date, zone="Zone A")\
    .pipe(add_holidays_period, feat_date=feat_date, zone="Zone B")\
    .pipe(add_holidays_period, feat_date=feat_date, zone="Zone C")\
    .drop(columns=["Date", "Fourni"])

In [285]:
df_train

Unnamed: 0,id,Date_year,Date_month,Date_day,Date_weekday,Date_weekend,lockdown,Description_ZoneA,Description_ZoneB,Description_ZoneC
92,-6795017860123346820,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
72,4324517785573311838,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
71,5056562842583071429,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
69,-5277685202972058648,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
68,4079792561055568638,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
...,...,...,...,...,...,...,...,...,...,...
64186,6858783929733742556,2022,6,20,0,0,0,,,
64187,2132453013379446212,2022,6,20,0,0,0,,,
64189,8884923748487096025,2022,6,20,0,0,0,,,
64221,2942031581314612067,2022,6,20,0,0,0,,,


In [286]:
feat_cat = ["id", "Date_weekend", "lockdown"]

In [287]:
df_train

Unnamed: 0,id,Date_year,Date_month,Date_day,Date_weekday,Date_weekend,lockdown,Description_ZoneA,Description_ZoneB,Description_ZoneC
92,-6795017860123346820,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
72,4324517785573311838,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
71,5056562842583071429,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
69,-5277685202972058648,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
68,4079792561055568638,2020,1,2,3,0,0,Vacances de Noël,Vacances de Noël,Vacances de Noël
...,...,...,...,...,...,...,...,...,...,...
64186,6858783929733742556,2022,6,20,0,0,0,,,
64187,2132453013379446212,2022,6,20,0,0,0,,,
64189,8884923748487096025,2022,6,20,0,0,0,,,
64221,2942031581314612067,2022,6,20,0,0,0,,,


In [288]:
list_train_valid = get_split_train_val_cv(df_train, y_train, n_splits=5)

# 3. Train model

In [289]:
df_train_, df_valid_, y_train_, y_valid_ = list_train_valid[0]

In [290]:
# params = {
#     "iterations": 1000,
#     "depth": 7,
#     "loss_function": "RMSE",
#     # "learning_rate": 0.05,
#     "early_stopping_rounds": 100,
# }

# model, pred_train, pred_valid = train_model_cv_mlflow(
#     list_train_valid=list_train_valid,
#     feat_cat=feat_cat, plot_training=False, verbose=0,
#     **params
# )

In [291]:
df_hol = pd.read_pickle("../data/processed/holidays_france.pkl")
df_hol

Unnamed: 0,Description,Population,date_begin,date_end,Académies,Zones,annee_scolaire
1671,Vacances de Noël,-,2019-12-21,2020-01-06,Montpellier,Zone C,2019-2020
1667,Vacances de Noël,-,2019-12-21,2020-01-06,Lyon,Zone A,2019-2020
1668,Vacances de Noël,-,2019-12-21,2020-01-06,Aix-Marseille,Zone B,2019-2020
108,Vacances d'Hiver,-,2020-02-08,2020-02-24,Créteil,Zone C,2019-2020
109,Vacances d'Hiver,-,2020-02-15,2020-03-02,Amiens,Zone B,2019-2020
...,...,...,...,...,...,...,...
1910,Pont de l'Ascension,-,2026-05-15,2026-05-16,Toulouse,Zone C,2025-2026
1537,Pont de l'Ascension,-,2026-05-15,2026-05-16,Poitiers,Zone A,2025-2026
360,Début des Vacances d'Été,-,2026-07-04,2026-07-04,Aix-Marseille,Zone B,2025-2026
1155,Début des Vacances d'Été,-,2026-07-04,2026-07-04,Dijon,Zone A,2025-2026
