# Imports

In [1]:
import warnings

import pandas as pd

from functions.loading import load_data

from functions.training_pipeline import training_pipeline
from functions.models import xgboost_model, catboost_model, lgbm_model

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None

# Parameters definition

In [4]:
# paths
path_rawdata = 'data/raw_data/'
path_models = 'models/'
path_benchmark = 'benchmark/'
path_results = 'results/'
path_plot = path_results +'plot/'
path_intermediary = 'data/intermediary_data/'
path_plot = 'results/plot/'

# Training parameters
targets = ["cf1_log","cf2_log","cf3_log", "cf123_log"]
models = {
        "xgboost": xgboost_model,
        "catboost": catboost_model,
        "lgbm": lgbm_model,
}
training_parameters = {
    "seed":0,
    "n_iter":10,
    "extended_features": [
            "revenue_log",
            "employees_log",
            "asset_log",
            "nppe_log",
            "capex_log",
            "age",
            "cap_inten",
            "gmar",
            "leverage",
            "price",
            "fuel_intensity",
            "fiscal_year",
            "energy_consumed_log",
            "energy_produced_log",
            "intan_log",
            "accu_dep_log",
            "cogs_log",
        ],
    "selec_sect":["gics_sub_ind", "gics_ind", "gics_group"],
    "cross_val": False,
}
restricted_features=False
save=True,

# Results containers
summary_final=[]
Summary_Final_train = []
ensemble =[]
summary_metrics_detailed = pd.DataFrame()
estimated_scopes = []

# Data Loading 

In [3]:
preprocessed_dataset = load_data(path_rawdata, save=True)

# Training pipeline 

In [None]:
best_scores, best_stds, summary_global, summary_metrics_detailed = training_pipeline(
    path_benchmark=path_benchmark,
    path_results=path_results,
    path_models=path_models,
    path_intermediary=path_intermediary,
    path_plot=path_plot,
    targets=targets,
    models=models,
    summary_final=summary_final,
    ensemble=ensemble,
    summary_metrics_detailed=summary_metrics_detailed,
    estimated_scopes=estimated_scopes,
    preprocessed_dataset=preprocessed_dataset,
    training_parameters=training_parameters,
    restricted_features=restricted_features,
    save=save,
)

In [None]:
best_scores

# Model application

In [2]:
from functions.apply_model import apply_model_on_raw_data

In [None]:
raw_dataset = load_data(path_rawdata, filter_outliers=False, save=False)
estimations = apply_model_on_raw_data(raw_dataset,
    path_intermediary=path_intermediary,
    path_models=path_models,
    path_results=path_results,
    save=False
)
estimations