In [1]:
import antigravity
import warnings

import pandas as pd
import numpy as np

from pandas_profiling import ProfileReport

from functions.loading import load_data
from functions.merged_dataset_creation import create_preprocessed_dataset
from functions.training_pipeline import training_pipeline
from functions.models import xgboost_model, catboost_model, lgbm_model

warnings.filterwarnings('ignore')
pd.options.mode.chained_assignment = None


  from .autonotebook import tqdm as notebook_tqdm


## Parameters defintion

In [8]:
path_rawdata = 'data/raw_data/'
path_models = 'models/proprietary_data/'
path_Benchmark = 'Benchmark/'
path_results = 'results/proprietary_data/'
path_intermediary = 'data/intermediary_data/proprietary_data/'
path_plot = 'results/proprietary_data/plot/'

targets = ["CF1_log","CF2_log","CF3_log", "CF123_log"]
models = {
        # "xgboost": xgboost_model,
        "catboost": catboost_model,
        "lgbm": lgbm_model,
}
training_parameters = {
    "low":0.01,
    "high":1,
    "extended_features": [
            "Revenue_log",
            "EMP_log",
            "Asset_log",
            "NPPE_log",
            "CapEx_log",
            "Age",
            "CapInten",
            "GMAR",
            "Leverage",
            "Price",
            "FuelIntensity",
            "FiscalYear",
            "ENEConsume_log",
            "ENEProduce_log",
            "INTAN_log",
            "AccuDep_log",
            "COGS_log",
        ],
    "selec_sect":["GICSSubInd", "GICSInd", "GICSGroup"],
    "fill_grp":"",
    "old_pipe":False,  
    "cross_val": False,
}

Summary_Final=[]
ensemble =[]

## Train and save best models for proprietary data

In [9]:
Refinitiv_data, CarbonPricing, IncomeGroup, FuelIntensity, GICSReclass = load_data(path_rawdata) 
        
preprocessed_dataset = create_preprocessed_dataset(
    Refinitiv_data,
    GICSReclass,
    CarbonPricing,
    IncomeGroup,
    FuelIntensity) 

In [10]:
best_scores, best_stds, summary_global = training_pipeline(
    name_experiment="best_models_on_proprietary_data_1",
    path_Benchmark=path_Benchmark,
    path_rawdata=path_rawdata,
    path_models=path_models,
    path_intermediary=path_intermediary,
    targets=targets,
    models=models,
    Summary_Final=Summary_Final,
    ensemble=ensemble,
    preprocessed_dataset=preprocessed_dataset,
    training_parameters=training_parameters,
    open_data=False,
    save=True,
)

In [None]:
summary_global

Unnamed: 0,Target,model,mae,mse,r2,rmse,mape,std
0,CF1_log,xgboost,0.546257,0.560161,0.735338,0.748439,0.232656,0.052636
1,CF1_log,catboost,0.511705,0.500921,0.763327,0.707757,0.225965,0.050239
2,CF1_log,lgbm,0.538543,0.541952,0.743941,0.736174,0.239444,0.050315
3,CF2_log,xgboost,0.472788,0.409697,0.635211,0.640076,0.11728,0.067646
4,CF2_log,catboost,0.454716,0.382251,0.659649,0.618264,0.112823,0.064465
5,CF2_log,lgbm,0.47704,0.415753,0.629819,0.644789,0.11909,0.060904
6,CF3_log,xgboost,0.883134,1.298067,0.412348,1.139328,0.224584,0.108354
7,CF3_log,catboost,0.872315,1.237077,0.43996,1.11224,0.219863,0.106741
8,CF3_log,lgbm,0.87338,1.243042,0.437259,1.114918,0.221857,0.119147
9,CF123_log,xgboost,0.550226,0.520079,0.655915,0.721165,0.107768,0.057072


In [6]:
best_scores

NameError: name 'best_scores' is not defined