# Imports

In [1]:
import warnings

import pandas as pd

from functions.loading import load_data

from functions.training_pipeline import training_pipeline
from functions.models import xgboost_model, catboost_model, lgbm_model

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None

# Parameters definition

In [2]:
# Training parameters
restricted_features=False
save=False

models = {
        "xgboost": xgboost_model,
        "catboost": catboost_model,
        "lgbm": lgbm_model,
}

targets = ["cf1", "cf2", "cf3", "cf123"]

# Results containers
summary_final=[]
Summary_Final_train = []
summary_metrics_detailed = pd.DataFrame()
estimated_scopes = []

# Data Loading 

In [3]:
preprocessed_dataset = load_data(save=True)

# Training pipeline 

In [4]:
best_scores, best_stds, summary_global, summary_metrics_detailed = training_pipeline(
    targets=targets,
    models=models,
    summary_final=summary_final,
    summary_metrics_detailed=summary_metrics_detailed,
    estimated_scopes=estimated_scopes,
    preprocessed_dataset=preprocessed_dataset,
    restricted_features=restricted_features,
    save=save,
  
)

INFO:functions.training_pipeline:Training for target: cf1


Using pre created preprocessed files


INFO:functions.training_pipeline:Preprocessing done
INFO:functions.training_pipeline:Training model: xgboost
INFO:functions.training_pipeline:Training model: catboost
INFO:functions.training_pipeline:Training model: lgbm
INFO:functions.training_pipeline:Modelisation done
INFO:functions.training_pipeline:Elapsed time for target cf1: 257.38 seconds
INFO:functions.training_pipeline:Training for target: cf2


[0.5492875130294008, 0.5223518641181469, 0.5265227915777949]
Using pre created preprocessed files


INFO:functions.training_pipeline:Preprocessing done
INFO:functions.training_pipeline:Training model: xgboost
INFO:functions.training_pipeline:Training model: catboost
INFO:functions.training_pipeline:Training model: lgbm
INFO:functions.training_pipeline:Modelisation done
INFO:functions.training_pipeline:Elapsed time for target cf2: 217.55 seconds


[0.44988259417896564, 0.4183662367913087, 0.4336985040169224]


INFO:functions.training_pipeline:Training for target: cf3


Using pre created preprocessed files


INFO:functions.training_pipeline:Preprocessing done
INFO:functions.training_pipeline:Training model: xgboost
INFO:functions.training_pipeline:Training model: catboost
INFO:functions.training_pipeline:Training model: lgbm
INFO:functions.training_pipeline:Modelisation done
INFO:functions.training_pipeline:Elapsed time for target cf3: 125.29 seconds
INFO:functions.training_pipeline:Training for target: cf123


[0.9608576597792737, 0.9029440524476459, 0.9246305172829612]
Using pre created preprocessed files


INFO:functions.training_pipeline:Preprocessing done
INFO:functions.training_pipeline:Training model: xgboost
INFO:functions.training_pipeline:Training model: catboost
INFO:functions.training_pipeline:Training model: lgbm
INFO:functions.training_pipeline:Modelisation done
INFO:functions.training_pipeline:Elapsed time for target cf123: 94.94 seconds


[0.5754712644052693, 0.5517300603325132, 0.5587758061474672]


In [5]:
best_scores

[0.5223518641181469,
 0.4183662367913087,
 0.9029440524476459,
 0.5517300603325132]

In [6]:
best_stds

[0.10109473456997788,
 0.07442082860850331,
 0.08198841141861354,
 0.055044762003435256]

# Model application

In [7]:
from functions.apply_model import apply_model_on_raw_data

In [8]:
raw_dataset = load_data(filter_outliers=False, save=False)
estimations = apply_model_on_raw_data(raw_dataset,
    save=False,
    restricted_features=restricted_features,
)
estimations