In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import pandas as pd

from sklearn.model_selection import cross_val_score
from tqdm import tqdm

import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
import numpy as np

# Machine Learning 
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

# Models
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
#from lightgbm import LGBMClassifier
#from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier

import matplotlib.pyplot as plt
path = "/Users/julesmourgues/Documents/Programmation/CFM/"

#Load x and y train data
input_test = pd.read_csv(path + "input_test.csv")
input_training = pd.read_csv(path + "input_training.csv")
#output_test_random = pd.read_csv(path + "output_test_random.csv")
output_training = pd.read_csv(path + "output_training_gmEd6Zt.csv")


from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from scipy.spatial.distance import squareform

def create_and_assign_clusters(df, num_clusters, column):
    # Sort by equity and day
    if column == "returns":
        df['func'] = df.iloc[:, 3:3+53].sum(axis = 1) # Sum of the 52 first columns
    elif column == "volatility":
        df['func'] = df.iloc[:, 3:3+53].std(axis = 1)

    new_df = df.pivot_table(index='day', columns='equity', values='func')
    new_df.columns.name = None
    func = new_df.fillna(0)

    # Calculate the Pearson correlation matrix and convert it to distances
    distance_matrix = np.sqrt(0.5 * (1 - func.corr(method='pearson')))

    # Perform hierarchical clustering using Ward's method
    linked = linkage(np.nan_to_num(squareform(distance_matrix, checks=False), nan=0, posinf=1e9, neginf=-1e9), method='ward')

    # Use the fcluster function to assign cluster labels to tickers
    cluster_labels = fcluster(linked, num_clusters, criterion='maxclust')

    # Create a DataFrame with the cluster labels and corresponding tickers
    clustered_data = pd.DataFrame({'Ticker': func.columns, 'Cluster': cluster_labels})

    # Use a loop to extract each cluster and store it in a separate DataFrame
    clusters = {i: clustered_data[clustered_data['Cluster'] == i]['Ticker'] for i in range(1, num_clusters + 1)}

    if df['equity'].max() >= 1000000:
        for cluster in clusters:
            for index in clusters[cluster].index:
                if column == "returns":
                    df.loc[df['equity'] == (1000000 + index), 'cluster_returns'] = cluster
                elif column == "volatility":
                    df.loc[df['equity'] == (1000000 + index), 'cluster_volatility'] = cluster
    else:
        for cluster in clusters:
            for index in clusters[cluster].index:
                if column == "returns":
                    df.loc[df['equity'] == index, 'cluster_returns'] = cluster
                elif column == "volatility":
                    df.loc[df['equity'] == index, 'cluster_volatility'] = cluster

    #Suoprimer la colonne returns
    df = df.drop(columns = ['func'])
    return df


def preprocess_data(input_training, input_test, outlier=0.05, groupby=False, IQRoutlier = True, variables = False, na_count = False, clusters = False, cluster_type = "returns", suppr = False, nbforlast = 5):
    import pandas as pd
    from datetime import time, timedelta, datetime, date

    df_train = input_training.copy()
    #Ajoute ue colonne qui compte le nombre de na dans chaque ligne
    if na_count is not False:
        df_train['na_count'] = df_train.isnull().sum(axis=1)
      
        #Remplace les inf par des 60
        df_train = df_train.replace([np.inf, -np.inf], 60)
    df_train = df_train.fillna(0)
    df_train = df_train.sort_values(by=['equity', 'day'])

    df_test = input_test.copy()
    #Ajoute ue colonne qui compte le nombre de na dans chaque ligne
    if na_count is not False:
        df_test['na_count'] = df_test.isnull().sum(axis=1)

        #Remplace les inf par des 60
        df_test = df_test.replace([np.inf, -np.inf], 60)
    df_test = df_test.fillna(0)
    df_test = df_test.sort_values(by=['equity', 'day'])

    if outlier is not False:
        for col in df_train.iloc[:,3:3+53].columns:
            Q1 = df_train[col].quantile(outlier)
            Q3 = df_train[col].quantile(1 - outlier)
            IQR = Q3 - Q1
            median = df_train[col].median()
            if IQRoutlier is not False:
                df_train.loc[(df_train[col] < (Q1 - 1.5 * IQR)), col] = Q1 - 1.5 * IQR
                df_train.loc[(df_train[col] > (Q3 + 1.5 * IQR)), col] = Q3 + 1.5 * IQR
            else:
                df_train.loc[((df_train[col] < ((Q1 - 1.5 * IQR))) | (df_train[col] > (Q3 + 1.5 * IQR))), col] = median

        for col in df_test.iloc[:,3:3+53].columns:
            Q1 = df_test[col].quantile(outlier)
            Q3 = df_test[col].quantile(1 - outlier)
            IQR = Q3 - Q1
            median = df_test[col].median()
            if IQRoutlier is not False:
                df_test.loc[(df_test[col] < (Q1 - 1.5 * IQR)), col] = Q1 - 1.5 * IQR
                df_test.loc[(df_test[col] > (Q3 + 1.5 * IQR)), col] = Q3 + 1.5 * IQR
            else:
                df_test.loc[((df_test[col] < ((Q1 - 1.5 * IQR))) | (df_test[col] > (Q3 + 1.5 * IQR))), col] = median


    if clusters is not False:
        if cluster_type == "returns":
            df_train = create_and_assign_clusters(df_train, clusters, "returns")
            df_test = create_and_assign_clusters(df_test, clusters, "returns")
        elif cluster_type == "volatility":
            df_train = create_and_assign_clusters(df_train, clusters, "volatility")
            df_test = create_and_assign_clusters(df_test, clusters, "volatility")

    def volatility(df):
            df['volatility'] = df.iloc[:,3:3+53].std(axis=1)
            df['returns'] = df.iloc[:,3:3+53].sum(axis=1)
            df['last_returns'] = df.iloc[:,3+53-nbforlast:3+53].iloc[:,-1]

    if variables == "volatility":
        volatility(df_train)
        volatility(df_test)
    elif variables == "market+volatility":
        volatility(df_train)
        volatility(df_test)

        def calculate_market_means1(df, group_col, value_cols, beta = True):

            grouped_values = df.groupby(group_col)[value_cols]

            mean_col = grouped_values.mean()
            #rolling_mean_col = mean_col.rolling(5).mean()

            df[f'market_mean_{value_cols}_{group_col}'] = df[group_col].map(mean_col)
            if beta == True:
                #df[f'rolling_market_mean_{value_col}_{group_col}'] = df[group_col].map(rolling_mean_col)
                df[f'{group_col}_{value_cols}_beta'] = df[value_cols] / df[f'market_mean_{value_cols}_{group_col}']

            '''if IQRindicateur is True:
                if value_col == 'volatility':
                    Q1 = grouped_values.quantile(0.25)
                    Q3 = grouped_values.quantile(0.75)
                    IQR = Q3 - Q1

                    df[f'{value_col}_Q1_{group_col}'] = df[group_col].map(Q1)
                    df[f'{value_col}_Q3_{group_col}'] = df[group_col].map(Q3)
                    df[f'{value_col}_IQR_{group_col}'] = df[group_col].map(IQR)'''

        def calculate_market_means(df, group_cols, value_cols, beta = True):
            grouped_values = df.groupby(group_cols)[value_cols]

            mean_col = grouped_values.mean()

            # Create a multi-indexed series with the mean values
            mean_series = pd.Series(mean_col.values, index=pd.MultiIndex.from_tuples(mean_col.index))

            # Map the multi-indexed series to the original dataframe
            df[f'market_mean_{value_cols}_{"_".join(group_cols)}'] = df.set_index(group_cols).index.map(mean_series)

            if beta == True:
                df[f'{"_".join(group_cols)}_{value_cols}_beta'] = df[value_cols] / df[f'market_mean_{value_cols}_{"_".join(group_cols)}']



        # Utiliser la fonction calculate_market_means pour les colonnes 'returns' et 'volatility'
        calculate_market_means1(df_train, 'day', 'volatility') 
        calculate_market_means1(df_test, 'day', 'volatility')
        calculate_market_means1(df_train, 'equity', 'volatility') 
        calculate_market_means1(df_test, 'equity', 'volatility')
        #calculate_market_means1(df_train, 'day', 'returns')
        #calculate_market_means1(df_test, 'day', 'returns')
        calculate_market_means1(df_train, 'day', 'last_returns')
        calculate_market_means1(df_test, 'day', 'last_returns')

        if cluster_type == "returns" and clusters is not False:
            calculate_market_means(df_train, ['cluster_returns', 'day'], 'volatility', beta = True)
            calculate_market_means(df_test, ['cluster_returns', 'day'], 'volatility', beta = True)
        elif cluster_type == "volatility" and clusters is not False:
            calculate_market_means(df_train, ['cluster_volatility', 'day'], 'volatility', beta = True)
            calculate_market_means(df_test, ['cluster_volatility', 'day'], 'volatility', beta = True)
    

    


        #calculate_market_means(df_test, 'day', 'volatility') 
    #calculate_market_means(df, 'equity', ['returns', 'volatility', 'last_returns', 'last_volatility']) 

    if groupby is False:
        #df_train['next_day_r0'] = df_train.groupby('equity')['r0'].shift(-1)
        #df_test['next_day_r0'] = df_test.groupby('equity')['r0'].shift(-1)
        print("No groupby")
    else:
        firstrtime = 35
        new_labels = []
        start_time = time(9, firstrtime)
        for i in range(53):
            new_labels.append(start_time.strftime('%H:%M'))
            start_time = (datetime.combine(date.today(), start_time) + timedelta(minutes=5)).time()

        col_rename_dict = {f'r{i}': new_labels[i] for i in range(53)}
        df_train.rename(columns=col_rename_dict, inplace=True)
        df_test.rename(columns=col_rename_dict, inplace=True)

        id_column_train = df_train['ID']
        id_column_test = df_test['ID']
        interval_dict = {label: "{:.2f}".format(datetime.strptime(label, '%H:%M').hour + (datetime.strptime(label, '%H:%M').minute // groupby) / (60/groupby)) for label in new_labels}
        
        df_train_grouped_interval = df_train.groupby(interval_dict, axis=1).sum()
        df_train_grouped_interval['ID'] = id_column_train
        df_train = df_train.merge(df_train_grouped_interval, on = 'ID', how='left')
        df_train = df_train.drop(df_train.columns[3:3+53], axis = 1)
        #df_train['next_day_first_interval'] = df_train.groupby('equity')[list(interval_dict.values())[1]].shift(-1)

        df_test_grouped_interval = df_test.groupby(interval_dict, axis=1).sum()
        df_test_grouped_interval['ID'] = id_column_test
        df_test = df_test.merge(df_test_grouped_interval, on = 'ID', how='left')
        df_test = df_test.drop(df_test.columns[3:3+53], axis = 1)
        #df_test['next_day_first_interval'] = df_test.groupby('equity')[list(interval_dict.values())[1]].shift(-1)

    if suppr is True:
        df_train = df_train.drop(df_train.columns[3:3+53], axis = 1)
        df_test = df_test.drop(df_test.columns[3:3+53], axis = 1)

    df_train = df_train.merge(output_training, on = 'ID', how='left')



    return df_train, df_test

In [2]:
na_count = False
outlier = 0.05

df_train = input_training.copy()
#Ajoute ue colonne qui compte le nombre de na dans chaque ligne
if na_count is not False:
    df_train['na_count'] = df_train.isnull().sum(axis=1)
df_train = df_train.fillna(0)
df_train = df_train.sort_values(by=['equity', 'day'])

df_test = input_test.copy()
#Ajoute ue colonne qui compte le nombre de na dans chaque ligne
if na_count is not False:
    df_test['na_count'] = df_test.isnull().sum(axis=1)
df_test = df_test.fillna(0)
df_test = df_test.sort_values(by=['equity', 'day'])

for col in df_train.iloc[:,3:3+53].columns:
    Q1 = df_train[col].quantile(outlier)
    Q3 = df_train[col].quantile(1 - outlier)
    IQR = Q3 - Q1
    median = df_train[col].median()
    df_train.loc[((df_train[col] < ((Q1 - 1.5 * IQR))) | (df_train[col] > (Q3 + 1.5 * IQR))), col] = median

for col in df_test.iloc[:,3:3+53].columns:
    Q1 = df_test[col].quantile(outlier)
    Q3 = df_test[col].quantile(1 - outlier)
    IQR = Q3 - Q1
    median = df_test[col].median()
    df_test.loc[((df_test[col] < ((Q1 - 1.5 * IQR))) | (df_test[col] > (Q3 + 1.5 * IQR))), col] = median


df_train_cumsum = df_train.copy()
df_train_cumsum.insert(3, 'rinitiale', 0)
df_train_cumsum.iloc[:,4:4+53] = df_train_cumsum.iloc[:,4:4+53].cumsum(axis=1)
#df_train_cumsum.iloc[:,3:4+53] = df_train_cumsum.iloc[:,3:4+53] +1000000


df_test_cumsum = df_test.copy()
df_test_cumsum.insert(3, 'rinitiale', 0)
df_test_cumsum.iloc[:,4:4+53] = df_test_cumsum.iloc[:,4:4+53].cumsum(axis=1)
#df_test_cumsum.iloc[:,3:4+53] = df_test_cumsum.iloc[:,3:4+53] +1000000


import numpy as np

# Group by day and calculate mean cumsum
df_train_grouped = df_train_cumsum.iloc[:,3:4+53].groupby(df_train_cumsum['day']).mean()

# Calculate area under the curve for each group
df_train_grouped['auc'] = df_train_grouped.apply(lambda row: np.trapz(row), axis=1)

# Subtract area under the curve of each row from the group's auc
df_train_cumsum = df_train_cumsum.merge(df_train_grouped['auc'], on='day', how='left')
df_train_cumsum['diff'] = df_train_cumsum.iloc[:,3:4+53].apply(lambda row: np.trapz(row), axis=1) / df_train_cumsum['auc'] 
df_train_cumsum['r52_mean_day'] = df_train_cumsum.groupby('day')['r52'].transform('mean')
df_train_cumsum['r52_mean_equity'] = df_train_cumsum.groupby('equity')['r52'].transform('mean')

# Group by day and calculate mean cumsum
df_test_grouped = df_test_cumsum.iloc[:,3:4+53].groupby(df_test_cumsum['day']).mean()

# Calculate area under the curve for each group
df_test_grouped['auc'] = df_test_grouped.apply(lambda row: np.trapz(row), axis=1)

# Subtract area under the curve of each row from the group's auc
df_test_cumsum = df_test_cumsum.merge(df_test_grouped['auc'], on='day', how='left')
df_test_cumsum['diff'] = df_test_cumsum.iloc[:,3:4+53].apply(lambda row: np.trapz(row), axis=1)/df_test_cumsum['auc']
df_test_cumsum['r52_mean_day'] = df_test_cumsum.groupby('day')['r52'].transform('mean')
df_test_cumsum['r52_mean_equity'] = df_test_cumsum.groupby('equity')['r52'].transform('mean')

df_test_cumsum['r52_mean'] = df_test_cumsum.groupby('equity')['r52'].transform('mean')



# Rename 'auc' to 'auc_day'
df_train_cumsum.rename(columns={'auc': 'auc_day'}, inplace=True)
df_test_cumsum.rename(columns={'auc': 'auc_day'}, inplace=True)

# Group by equity and calculate mean cumsum
df_train_grouped_equity = df_train_cumsum.iloc[:,3:4+53].groupby(df_train_cumsum['equity']).mean()
df_test_grouped_equity = df_test_cumsum.iloc[:,3:4+53].groupby(df_test_cumsum['equity']).mean()

# Calculate area under the curve for each group
df_train_grouped_equity['auc_equity'] = df_train_grouped_equity.apply(lambda row: np.trapz(row), axis=1)
df_test_grouped_equity['auc_equity'] = df_test_grouped_equity.apply(lambda row: np.trapz(row), axis=1)

# Subtract area under the curve of each row from the group's auc_equity
df_train_cumsum = df_train_cumsum.merge(df_train_grouped_equity['auc_equity'], on='equity', how='left')
df_train_cumsum['diff_equity'] = df_train_cumsum.iloc[:,3:4+53].apply(lambda row: np.trapz(row), axis=1) / df_train_cumsum['auc_equity'] 

df_test_cumsum = df_test_cumsum.merge(df_test_grouped_equity['auc_equity'], on='equity', how='left')
df_test_cumsum['diff_equity'] = df_test_cumsum.iloc[:,3:4+53].apply(lambda row: np.trapz(row), axis=1) / df_test_cumsum['auc_equity'] 
df_test_cumsum.set_index('ID', inplace=True)
df_train_cumsum.set_index('ID', inplace=True)

#df_train = df_train.merge(df_train_cumsum[['auc', 'diff']], on = 'ID', how='left')
#df_test = df_test.merge(df_test_cumsum[['auc', 'diff']], on = 'ID', how='left')

In [5]:
df_train_cumsum

Unnamed: 0_level_0,day,equity,rinitiale,r0,r1,r2,r3,r4,r5,r6,...,r49,r50,r51,r52,auc_day,diff,r52_mean_day,r52_mean_equity,auc_equity,diff_equity
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
832716,0,0,0,0.00,0.00,-12.61,-10.09,12.57,-2.50,-5.02,...,138.81,136.33,156.20,158.68,1075.783995,2.147903,49.866790,0.700258,-37.829751,-61.081025
403174,1,0,0,34.83,-44.50,-26.97,-2.07,5.38,5.38,32.65,...,-254.08,-266.79,-271.88,-264.24,-2483.807190,2.999879,-92.117900,0.700258,-37.829751,196.964551
532152,2,0,0,-71.63,-69.30,-57.67,-34.41,-39.06,-34.41,-59.99,...,-59.75,-64.43,-64.43,-69.11,175.328754,-19.556319,12.784301,0.700258,-37.829751,90.637259
85964,3,0,0,0.00,51.26,11.85,11.85,7.19,7.19,21.18,...,-0.49,-0.49,13.42,21.52,183.210000,7.318705,2.136430,0.700258,-37.829751,-35.444589
70965,4,0,0,40.78,74.15,2.89,-21.03,-11.42,17.35,3.01,...,179.05,190.83,174.36,183.79,-265.227927,-12.081250,-2.321638,0.700258,-37.829751,-84.702777
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
724614,498,1828,0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,-82.44,-82.44,-46.87,-46.87,-3140.908703,0.189787,-72.352847,1.875268,36.770159,-16.211651
502383,499,1828,0,0.00,0.00,0.00,0.00,67.96,67.96,67.96,...,-33.84,-33.84,-33.84,-33.84,579.798309,-2.075360,10.472275,1.875268,36.770159,-32.724634
520688,500,1828,0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,-3956.174713,-0.000000,-107.902656,1.875268,36.770159,0.000000
362717,501,1828,0,0.00,0.00,0.00,0.00,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,-269.470033,-0.000000,1.838784,1.875268,36.770159,0.000000


In [3]:
cluster_type = "returns"

df_train, df_test = preprocess_data(input_training, input_test, outlier=0.05, groupby=False, IQRoutlier = False, variables="market+volatility", na_count=True, clusters = True, cluster_type = cluster_type, suppr = True, nbforlast = 18)

# Définition de l'index des dataframes df_train et df_test
df_train = df_train.set_index('ID')
df_test = df_test.set_index('ID')

def sharpe(df):
    df['market_sharpe'] = df['market_mean_last_returns_day'] / df['market_mean_volatility_day']
    df['sharpe'] = df['last_returns'] / df['volatility']

sharpe_bool = False

if sharpe_bool == 1:
    sharpe(df_train)
    sharpe(df_test)

#Ajoute à df_train la colonne auc et diff de df_train_cumsum sur l'Index
df_train = df_train.merge(df_train_cumsum[['auc_day', 'diff', 'r52_mean_day', 'r52_mean_equity', 'r52', 'auc_equity', 'diff_equity']], on = 'ID', how='left')
df_test = df_test.merge(df_test_cumsum[['auc_day', 'diff', 'r52_mean_day', 'r52_mean_equity', 'r52', 'auc_equity', 'diff_equity']], on = 'ID', how='left')

# Ajout de 1 à la colonne 'reod' de df_train
df_train['reod'] = df_train['reod'] + 1

# Séparation des données d'entraînement et de test
train = df_train[df_train['day'] <= 350]
test = df_train[df_train['day'] >= 351]

if cluster_type == "returns":
    suppr = ['day', 'equity', 'r52_mean_equity', 'auc_equity','equity_volatility_beta', 'diff','volatility', 'diff_equity' ,'r52','returns','last_returns','market_mean_last_returns_day','day_last_returns_beta']

elif cluster_type == "volatility":
    suppr = ['day', 'equity','volatility', 'returns']
    
df_test = df_test.drop(suppr, axis = 1)
df_train = df_train.drop(suppr, axis = 1)
#Drop suppr
test = test.drop(suppr, axis = 1)
train = train.drop(suppr, axis = 1)


# Séparation des features (X) et de la variable cible (y) pour l'entraînement et le test
X_train = train.drop('reod', axis=1)
y_train = train['reod']
X_test = test.drop('reod', axis=1)
y_test = test['reod']

dtrain_clf = xgb.DMatrix(X_train, y_train, enable_categorical=True)
dtest_clf = xgb.DMatrix(X_test, y_test, enable_categorical=True)
dall_clf = xgb.DMatrix(df_train.drop('reod', axis=1), df_train['reod'], enable_categorical=True)
dpred_clf = xgb.DMatrix(df_test, enable_categorical=True)

No groupby


Modèle : XGBoost

In [4]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score


#Modèle qui s'entraine sur toute la base
params = {"objective": "multi:softmax","num_class": 3, "tree_method": "hist", "random_state": 0}
          

params = {"objective": "multi:softmax","num_class": 3, "tree_method": "hist",
            "learning_rate": 0.3, "max_depth": 6,
            "gamma": 0, "subsample": 1, "colsample_bytree": 1,
            "alpha": 0, "lambda": 1,"random_state": 0}

n = 3


evals = [(dtest_clf, "validation"), (dtrain_clf, "train")]


model = xgb.train(
   params=params,
   dtrain=dall_clf,
   num_boost_round=n,
   evals=evals,
   verbose_eval=1,
   # Activate early stopping
   early_stopping_rounds=30
)
preds = model.predict(dtest_clf)
rmse = mean_squared_error(y_test, preds, squared=False)
accuracy = accuracy_score(y_test, preds.round())

print(f"RMSE of the base model: {rmse:.3f}")
print(f"Accuracy of the base model: {accuracy:.3f}") 

from sklearn.metrics import confusion_matrix

# Calculer la matrice de confusion
conf_matrix = confusion_matrix(y_test, preds)

# Calculer le ratio de 1, 0 et -1 correctement prédits
ratio_1 = conf_matrix[2, 2] / conf_matrix[2].sum()
ratio_0 = conf_matrix[1, 1] / conf_matrix[1].sum()
ratio_minus_1 = conf_matrix[0, 0] / conf_matrix[0].sum()

print(f"Ratio de 1 bien prédit: {ratio_1}")
print(f"Ratio de 0 bien prédit: {ratio_0}")
print(f"Ratio de -1 bien prédit: {ratio_minus_1}")


[0]	validation-mlogloss:1.05759	train-mlogloss:1.05944
[1]	validation-mlogloss:1.03364	train-mlogloss:1.03722
[2]	validation-mlogloss:1.01878	train-mlogloss:1.02219
RMSE of the base model: 0.967
Accuracy of the base model: 0.491
Ratio de 1 bien prédit: 0.1358510728044768
Ratio de 0 bien prédit: 0.7267016591705087
Ratio de -1 bien prédit: 0.4924973482006571
