In [12]:
#ouvre Data.csv

import pandas as pd

df = pd.read_csv('Data.csv', sep=',')
list_crypto = ["BTC", "ETH", "BNB", "LTC", "DOGE", "BCH", "XRP", "MATIC", "ADA", "DOT", "SOL"]


In [13]:
def prepare_data(df, crypto_name):
    # Filtrer les colonnes liées à la crypto-monnaie
    crypto_columns = df.filter(like=crypto_name.upper()).columns.tolist() + df.filter(like=crypto_name.lower()).columns.tolist()
    df_crypto = df[crypto_columns + ['date']]
    
    # Calculer le retour sur investissement
    df_crypto['Return_{}'.format(crypto_name.upper())] = df_crypto['Close_{}'.format(crypto_name.upper())].pct_change()
    
    # Supprimer la première ligne
    df_crypto = df_crypto.drop(df_crypto.index[0])
    
    # Créer la colonne 'target'
    df_crypto['target'] = (df_crypto['Return_{}'.format(crypto_name.upper())].shift(-1) > df_crypto['Return_{}'.format(crypto_name.upper())]).astype(int)
    
    # Convertir la colonne 'date' en datetime et créer des colonnes pour l'année, le mois, le jour et le jour de la semaine
    df_crypto['date'] = pd.to_datetime(df_crypto['date'])
    df_crypto['year'] = df_crypto['date'].dt.year
    df_crypto['month'] = df_crypto['date'].dt.month
    df_crypto['day'] = df_crypto['date'].dt.day
    df_crypto['day_of_week'] = df_crypto['date'].dt.dayofweek
    
    # Diviser les données en train et test
    df_train = df_crypto[df_crypto['date'] < '2022-08-01'].set_index('date')
    df_test = df_crypto[df_crypto['date'] >= '2022-08-01'].set_index('date')
    
    return df_train, df_test, df_crypto

dfs = {}

for crypto in list_crypto:
    df_train, df_test, df_crypto = prepare_data(df, crypto)
    dfs[crypto] = {'train': df_train, 'test': df_test, 'crypto': df_crypto}
X_train = df_train.drop('target',axis=1)
y_train = df_train['target']
X_test = df_test.drop('target',axis=1)
y_test = df_test['target']

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_crypto['Return_{}'.format(crypto_name.upper())] = df_crypto['Close_{}'.format(crypto_name.upper())].pct_change()


In [6]:
def run_ml_models(X_train, y_train, X_test, y_test,df):
    from sklearn import ensemble, gaussian_process, linear_model, naive_bayes, neighbors, svm, tree, discriminant_analysis
    from sklearn.metrics import accuracy_score, mean_squared_error
    from xgboost import XGBClassifier
    import pandas as pd
    import numpy as np
    import time

    seed = 42  # Vous pouvez choisir n'importe quel nombre pour la graine

    MLA = [
        ensemble.AdaBoostClassifier(random_state=seed),
        ensemble.BaggingClassifier(random_state=seed),
        ensemble.ExtraTreesClassifier(random_state=seed),
        ensemble.GradientBoostingClassifier(random_state=seed),
        ensemble.RandomForestClassifier(random_state=seed),
        gaussian_process.GaussianProcessClassifier(random_state=seed),
        linear_model.LogisticRegressionCV(random_state=seed),
        linear_model.PassiveAggressiveClassifier(random_state=seed),
        linear_model.RidgeClassifierCV(),  # Pas de random_state pour RidgeClassifierCV
        linear_model.SGDClassifier(random_state=seed),
        linear_model.Perceptron(random_state=seed),
        naive_bayes.BernoulliNB(),
        naive_bayes.GaussianNB(),
        neighbors.KNeighborsClassifier(),
        svm.SVC(probability=True, random_state=seed),
        tree.DecisionTreeClassifier(random_state=seed),
        tree.ExtraTreeClassifier(random_state=seed),
        discriminant_analysis.LinearDiscriminantAnalysis(),
        discriminant_analysis.QuadraticDiscriminantAnalysis(),
        XGBClassifier(random_state=seed)    
    ]

    MLA_columns = ['MLA Name', 'MLA Parameters', 'MLA Test Accuracy Mean','MLA Time','MLA RMSE']
    MLA_compare = pd.DataFrame(columns = MLA_columns)
    MLA_predict = y_train.copy()

    row_index = 0
    for alg in MLA:
        MLA_name = alg.__class__.__name__
        MLA_compare.loc[row_index, 'MLA Name'] = MLA_name
        MLA_compare.loc[row_index, 'MLA Parameters'] = str(alg.get_params())

        start_time = time.time()
        alg.fit(X_train, y_train)
        predictions = alg.predict(X_test)
        alg.fit(df.drop('target', axis=1).iloc[:-1], df['target'].iloc[:-1])
        last_date_prediction = alg.predict(df.drop('target', axis=1).iloc[-1:])
        end_time = time.time()

        accuracy = accuracy_score(y_test, predictions)
        rmse = np.sqrt(mean_squared_error(y_test, predictions))

        MLA_compare.loc[row_index, 'MLA Time'] = end_time - start_time
        MLA_compare.loc[row_index, 'MLA Test Accuracy Mean'] = accuracy
        MLA_compare.loc[row_index, 'MLA RMSE'] = rmse

        MLA_predict[MLA_name] = last_date_prediction

        row_index+=1

    MLA_compare.sort_values(by = ['MLA Test Accuracy Mean'], ascending = False, inplace = True)
    return MLA_compare, MLA_predict


In [18]:
import pandas as pd
from datetime import datetime, timedelta
import random
import numpy as np
from sklearn.metrics import accuracy_score, mean_squared_error

# Supposons que df_full est votre DataFrame complet qui contient toutes les données

# Fonction pour créer un DataFrame pour une période donnée
def create_df(start_date, end_date, dfcrypto):
    # Pour cet exemple, nous allons simplement filtrer df_full pour obtenir les données dans la période spécifiée.
    df = dfcrypto[(dfcrypto['date'] >= start_date) & (dfcrypto['date'] <= end_date)]
    return df


def predict_next_day(X_train, y_train, X_test, y_test, df, current_month, previous_month, best_model):
    # Si nous sommes dans un nouveau mois ou si aucun meilleur modèle n'a encore été trouvé
    if best_model_name is None or previous_month != current_month:
        MLA_compare, MLA_predict = run_ml_models(X_train, y_train, X_test, y_test, df)
        
        # Calculer le ratio Accuracy/RMSE pour chaque modèle
        MLA_compare['Accuracy/RMSE'] = MLA_compare['MLA Test Accuracy Mean'] / MLA_compare['MLA RMSE']
        
        # Trouver le modèle avec le meilleur ratio Accuracy/RMSE
        MLA_compare['Accuracy/RMSE'] = pd.to_numeric(MLA_compare['Accuracy/RMSE'], errors='coerce')
        best_model = MLA_compare.loc[MLA_compare['Accuracy/RMSE'].idxmax()]
        print(best_model['MLA Name'])
        previous_month = current_month
    else :
        previous_month = current_month
        MLA_compare, MLA_predict = run_ml_models(X_train, y_train, X_test, y_test, df)
        best_model = best_model
        print(best_model['MLA Name'])
        previous_month = current_month

    
    
    return MLA_predict[best_model['MLA Name']], best_model['MLA Name'], best_model['MLA Test Accuracy Mean'], best_model['MLA RMSE'], previous_month, best_model


In [None]:

from sklearn.ensemble import RandomForestClassifier

# def predict_next_day(X_train, y_train, X_test, y_test, df, current_month, previous_month, best_model):
#     # Initialiser le modèle RandomForestClassifier
#     model = RandomForestClassifier()
    
#     model.fit(X_train, y_train)
#     predictions = model.predict(X_test)
#     model.fit(df.drop('target', axis=1).iloc[:-1], df['target'].iloc[:-1])
#     last_date_prediction = model.predict(df.drop('target', axis=1).iloc[-1:])
    
#     accuracy = accuracy_score(y_test, predictions)
#     rmse = np.sqrt(mean_squared_error(y_test, predictions))
    
#     # Imprimer le nom du modèle
#     print(model.__class__.__name__)
    
#     return last_date_prediction, model.__class__.__name__, accuracy, rmse, current_month, model

# Initialiser la date de début et la date de fin
prediction_start_date = datetime(2022, 8, 1)
end_date = dfcrypto['date'].max()
#end_date = datetime(2022,10,1)


#isole un df qui contient les données entre les deux dates 
df = create_df(prediction_start_date, end_date)

#récupère toute la colonne date afin de boucler dessus donc dans un nouveau df 
df_date = df['date']

#mets df_date dans un dataframe
df_date = pd.DataFrame(df_date)

# Initialiser une liste vide pour stocker les dates de début
start_dates = []

# Parcourir chaque date dans df_date
for date in df_date['date']:
    # Calculer la date de début comme étant six mois avant la date actuelle
    start_date = date - pd.DateOffset(months=6)
    
    # Vérifier si start_date est dans df_btc['date']
    if start_date not in dfcrypto['date'].values:
        # Si start_date n'est pas dans df_btc['date'], trouver la date la plus proche qui est dans df_btc['date']
        start_date = dfcrypto['date'][dfcrypto['date'].sub(start_date).abs().idxmin()]
    
    # Ajouter la date de début à la liste start_dates
    start_dates.append(start_date)

# Ajouter la liste start_dates comme une nouvelle colonne 'start_date' dans df
df_date['start_date'] = start_dates
# Initialiser le DataFrame pour stocker les prédictions
predictions_df = pd.DataFrame(columns=['date', 'prediction'])

# Initialiser le meilleur modèle
best_model_name = None
best_model_accuracy = None
best_model_rmse = None
current_month = None
previous_month = None
best_model = None

# Boucle sur chaque ligne dans df_Date
for index, row in df_date.iterrows():
    # Utiliser la valeur de la colonne 'date' comme current_date
    current_date = row['date']
    # Utiliser la valeur de la colonne 'start_date' comme window_start_date
    window_start_date = row['start_date']

    # Créer le DataFrame pour la période de la fenêtre glissante
    df = create_df(window_start_date, current_date)
    print(df['date'].min(), df['date'].max())

    # Supposons que df est votre DataFrame
    train_ratio = 0.8
    train_size = int(len(df) * train_ratio)

    df_train = df.iloc[:train_size]
    df_test = df.iloc[train_size:]

    df_train = df_train.set_index('date')
    df_test = df_test.set_index('date')

    X_train = df_train.drop('target',axis=1)
    y_train = df_train['target']
    X_test = df_test.drop('target',axis=1)
    y_test = df_test['target']

    current_month = df['month'].iloc[-1]

    #drop la colonne date
    df = df.drop('date', axis=1)
    
    # Prédire la valeur pour le jour suivant
    next_day_prediction, best_model_name, best_model_accuracy, best_model_rmse, previous_month, best_model = predict_next_day(X_train, y_train, X_test, y_test, df, current_month, previous_month, best_model)
    
    # Ajouter la prédiction, le nom du modèle, l'accuracy et le RMSE au DataFrame des prédictions
    predictions_df = predictions_df.append({
        'date': current_date, 
        'prediction': next_day_prediction,
        'model': best_model_name,
        'accuracy': best_model_accuracy,
        'RMSE': best_model_rmse
    }, ignore_index=True)

# Afficher les prédictions
print(predictions_df)

In [23]:
import sklearn
# def can_handle_na(model):
#     """
#     Teste si un modèle peut gérer les valeurs NA.

#     Parameters:
#     model : Le modèle à tester.

#     Returns:
#     True si le modèle peut gérer les valeurs NA, False sinon.
#     """
#     import numpy as np
#     from sklearn.model_selection import train_test_split

#     # Créer un petit ensemble de données avec une valeur NA
#     X = np.array([[1, 2], [3, np.nan], [5, 6]])
#     y = np.array([1, 2, 3])

#     # Diviser les données en ensembles d'entraînement et de test
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#     try:
#         # Essayer d'entraîner le modèle sur les données
#         model.fit(X_train, y_train)
#         return True
#     except ValueError:
#         # Si une ValueError est levée, le modèle ne peut pas gérer les valeurs NA
#         return False

# Initialiser le DataFrame pour stocker les prédictions
predictions_df = pd.DataFrame()

# Boucle sur chaque crypto dans crypto_list
for crypto in list_crypto:
    df_train = dfs[crypto]['train']
    df_test = dfs[crypto]['test']
    dfcrypto = dfs[crypto]['crypto']

    # Initialiser la date de début et la date de fin
    prediction_start_date = datetime(2022, 8, 1)
    end_date = dfcrypto['date'].max()
    #end_date = datetime(2022,10,1)

    #isole un df qui contient les données entre les deux dates 
    df = create_df(prediction_start_date, end_date, dfcrypto)

    #récupère toute la colonne date afin de boucler dessus donc dans un nouveau df 
    df_date = df['date']

    #mets df_date dans un dataframe
    df_date = pd.DataFrame(df_date)

    # Initialiser une liste vide pour stocker les dates de début
    start_dates = []

    # Parcourir chaque date dans df_date
    for date in df_date['date']:
        # Calculer la date de début comme étant six mois avant la date actuelle
        start_date = date - pd.DateOffset(months=6)
    
        # Vérifier si start_date est dans df_btc['date']
        if start_date not in dfcrypto['date'].values:
            # Si start_date n'est pas dans df_btc['date'], trouver la date la plus proche qui est dans df_btc['date']
            start_date = dfcrypto['date'][dfcrypto['date'].sub(start_date).abs().idxmin()]
    
        # Ajouter la date de début à la liste start_dates
        start_dates.append(start_date)

    # Ajouter la liste start_dates comme une nouvelle colonne 'start_date' dans df
    df_date['start_date'] = start_dates

    # Boucle sur chaque ligne dans df_Date
    for index, row in df_date.iterrows():
        # Utiliser la valeur de la colonne 'date' comme current_date
        current_date = row['date']
        # Utiliser la valeur de la colonne 'start_date' comme window_start_date
        window_start_date = row['start_date']

        # Créer le DataFrame pour la période de la fenêtre glissante
        df = create_df(window_start_date, current_date, dfcrypto)
        print(df['date'].min(), df['date'].max())

        # Supposons que df est votre DataFrame
        train_ratio = 0.8
        train_size = int(len(df) * train_ratio)


        
        df = df.fillna(0)



        df_train = df.iloc[:train_size]
        df_test = df.iloc[train_size:]

        df_train = df_train.set_index('date')
        df_test = df_test.set_index('date')

        X_train = df_train.drop('target',axis=1)
        y_train = df_train['target']
        X_test = df_test.drop('target',axis=1)
        y_test = df_test['target']



        current_month = df['month'].iloc[-1]

        #drop la colonne date
        df = df.drop('date', axis=1)
        
        # Prédire la valeur pour le jour suivant
        next_day_prediction, best_model_name, best_model_accuracy, best_model_rmse, previous_month, best_model = predict_next_day(X_train, y_train, X_test, y_test, df, current_month, previous_month, best_model)
        
        # Ajouter la prédiction, le nom du modèle, l'accuracy et le RMSE au DataFrame des prédictions
        predictions_df = predictions_df.append({
            'date': current_date, 
            'prediction_{}'.format(crypto): next_day_prediction,
            'model_{}'.format(crypto): best_model_name,
            'accuracy_{}'.format(crypto): best_model_accuracy,
            'RMSE_{}'.format(crypto): best_model_rmse
        }, ignore_index=True)

# Afficher les prédictions
print(predictions_df)

2022-02-01 00:00:00 2022-08-01 00:00:00




ExtraTreeClassifier
2022-02-02 00:00:00 2022-08-02 00:00:00




ExtraTreeClassifier
2022-02-03 00:00:00 2022-08-03 00:00:00




ExtraTreeClassifier
2022-02-04 00:00:00 2022-08-04 00:00:00




ExtraTreeClassifier
2022-02-04 00:00:00 2022-08-05 00:00:00




ExtraTreeClassifier
2022-02-08 00:00:00 2022-08-08 00:00:00


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


ExtraTreeClassifier
2022-02-09 00:00:00 2022-08-09 00:00:00




ExtraTreeClassifier
2022-02-10 00:00:00 2022-08-10 00:00:00




ExtraTreeClassifier
2022-02-11 00:00:00 2022-08-11 00:00:00




ExtraTreeClassifier
2022-02-11 00:00:00 2022-08-12 00:00:00




ExtraTreeClassifier
2022-02-15 00:00:00 2022-08-15 00:00:00




ExtraTreeClassifier
2022-02-16 00:00:00 2022-08-16 00:00:00




ExtraTreeClassifier
2022-02-17 00:00:00 2022-08-17 00:00:00


  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])


ExtraTreeClassifier
2022-02-18 00:00:00 2022-08-18 00:00:00




ExtraTreeClassifier
2022-02-18 00:00:00 2022-08-19 00:00:00




ExtraTreeClassifier
2022-02-22 00:00:00 2022-08-22 00:00:00




ExtraTreeClassifier
2022-02-23 00:00:00 2022-08-23 00:00:00




ExtraTreeClassifier
2022-02-24 00:00:00 2022-08-24 00:00:00




ExtraTreeClassifier
2022-02-25 00:00:00 2022-08-25 00:00:00




ExtraTreeClassifier
2022-02-25 00:00:00 2022-08-26 00:00:00




ExtraTreeClassifier
2022-02-28 00:00:00 2022-08-29 00:00:00




ExtraTreeClassifier
2022-02-28 00:00:00 2022-08-30 00:00:00




ExtraTreeClassifier
2022-02-28 00:00:00 2022-08-31 00:00:00




ExtraTreeClassifier
2022-03-01 00:00:00 2022-09-01 00:00:00




BernoulliNB
2022-03-02 00:00:00 2022-09-02 00:00:00




BernoulliNB
2022-03-04 00:00:00 2022-09-05 00:00:00




BernoulliNB
2022-03-07 00:00:00 2022-09-06 00:00:00




BernoulliNB
2022-03-07 00:00:00 2022-09-07 00:00:00




BernoulliNB
2022-03-08 00:00:00 2022-09-08 00:00:00




BernoulliNB
2022-03-09 00:00:00 2022-09-09 00:00:00




BernoulliNB
2022-03-11 00:00:00 2022-09-12 00:00:00




BernoulliNB
2022-03-14 00:00:00 2022-09-13 00:00:00




BernoulliNB
2022-03-14 00:00:00 2022-09-14 00:00:00




BernoulliNB
2022-03-15 00:00:00 2022-09-15 00:00:00




BernoulliNB
2022-03-16 00:00:00 2022-09-16 00:00:00




BernoulliNB
2022-03-18 00:00:00 2022-09-19 00:00:00




BernoulliNB
2022-03-21 00:00:00 2022-09-20 00:00:00




BernoulliNB
2022-03-21 00:00:00 2022-09-21 00:00:00




BernoulliNB
2022-03-22 00:00:00 2022-09-22 00:00:00




BernoulliNB
2022-03-23 00:00:00 2022-09-23 00:00:00




BernoulliNB
2022-03-25 00:00:00 2022-09-26 00:00:00




BernoulliNB
2022-03-28 00:00:00 2022-09-27 00:00:00




BernoulliNB
2022-03-28 00:00:00 2022-09-28 00:00:00




BernoulliNB
2022-03-29 00:00:00 2022-09-29 00:00:00




BernoulliNB
2022-03-30 00:00:00 2022-09-30 00:00:00




BernoulliNB
2022-04-04 00:00:00 2022-10-03 00:00:00




RandomForestClassifier
2022-04-04 00:00:00 2022-10-04 00:00:00




RandomForestClassifier
2022-04-05 00:00:00 2022-10-05 00:00:00




RandomForestClassifier
2022-04-06 00:00:00 2022-10-06 00:00:00




RandomForestClassifier
2022-04-07 00:00:00 2022-10-07 00:00:00




RandomForestClassifier
2022-04-11 00:00:00 2022-10-10 00:00:00




RandomForestClassifier
2022-04-11 00:00:00 2022-10-11 00:00:00




RandomForestClassifier
2022-04-12 00:00:00 2022-10-12 00:00:00




RandomForestClassifier
2022-04-13 00:00:00 2022-10-13 00:00:00




RandomForestClassifier
2022-04-14 00:00:00 2022-10-14 00:00:00




RandomForestClassifier
2022-04-18 00:00:00 2022-10-17 00:00:00




RandomForestClassifier
2022-04-18 00:00:00 2022-10-18 00:00:00




RandomForestClassifier
2022-04-19 00:00:00 2022-10-19 00:00:00




RandomForestClassifier
2022-04-20 00:00:00 2022-10-20 00:00:00




RandomForestClassifier
2022-04-21 00:00:00 2022-10-21 00:00:00




RandomForestClassifier
2022-04-25 00:00:00 2022-10-24 00:00:00




RandomForestClassifier
2022-04-25 00:00:00 2022-10-25 00:00:00




RandomForestClassifier
2022-04-26 00:00:00 2022-10-26 00:00:00




RandomForestClassifier
2022-04-27 00:00:00 2022-10-27 00:00:00




RandomForestClassifier
2022-04-28 00:00:00 2022-10-28 00:00:00




RandomForestClassifier
2022-04-29 00:00:00 2022-10-31 00:00:00




RandomForestClassifier
2022-05-02 00:00:00 2022-11-01 00:00:00




AdaBoostClassifier
2022-05-02 00:00:00 2022-11-02 00:00:00




AdaBoostClassifier
2022-05-03 00:00:00 2022-11-03 00:00:00




AdaBoostClassifier
2022-05-04 00:00:00 2022-11-04 00:00:00




AdaBoostClassifier
2022-05-06 00:00:00 2022-11-07 00:00:00


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


AdaBoostClassifier
2022-05-09 00:00:00 2022-11-08 00:00:00




AdaBoostClassifier
2022-05-09 00:00:00 2022-11-09 00:00:00




AdaBoostClassifier
2022-05-10 00:00:00 2022-11-10 00:00:00




AdaBoostClassifier
2022-05-11 00:00:00 2022-11-11 00:00:00




AdaBoostClassifier
2022-05-13 00:00:00 2022-11-14 00:00:00




AdaBoostClassifier
2022-05-16 00:00:00 2022-11-15 00:00:00




AdaBoostClassifier
2022-05-16 00:00:00 2022-11-16 00:00:00




AdaBoostClassifier
2022-05-17 00:00:00 2022-11-17 00:00:00




AdaBoostClassifier
2022-05-18 00:00:00 2022-11-18 00:00:00




AdaBoostClassifier
2022-05-20 00:00:00 2022-11-21 00:00:00




AdaBoostClassifier
2022-05-23 00:00:00 2022-11-22 00:00:00




AdaBoostClassifier
2022-05-23 00:00:00 2022-11-23 00:00:00




AdaBoostClassifier
2022-05-24 00:00:00 2022-11-24 00:00:00




AdaBoostClassifier
2022-05-25 00:00:00 2022-11-25 00:00:00




AdaBoostClassifier
2022-05-27 00:00:00 2022-11-28 00:00:00




AdaBoostClassifier
2022-05-30 00:00:00 2022-11-29 00:00:00




AdaBoostClassifier
2022-05-30 00:00:00 2022-11-30 00:00:00




AdaBoostClassifier
2022-06-01 00:00:00 2022-12-01 00:00:00




LinearDiscriminantAnalysis
2022-06-02 00:00:00 2022-12-02 00:00:00




LinearDiscriminantAnalysis
2022-06-06 00:00:00 2022-12-05 00:00:00




LinearDiscriminantAnalysis
2022-06-06 00:00:00 2022-12-06 00:00:00




LinearDiscriminantAnalysis
2022-06-07 00:00:00 2022-12-07 00:00:00




LinearDiscriminantAnalysis
2022-06-08 00:00:00 2022-12-08 00:00:00




LinearDiscriminantAnalysis
2022-06-09 00:00:00 2022-12-09 00:00:00




LinearDiscriminantAnalysis
2022-06-13 00:00:00 2022-12-12 00:00:00




LinearDiscriminantAnalysis
2022-06-13 00:00:00 2022-12-13 00:00:00




LinearDiscriminantAnalysis
2022-06-14 00:00:00 2022-12-14 00:00:00




LinearDiscriminantAnalysis
2022-06-15 00:00:00 2022-12-15 00:00:00




LinearDiscriminantAnalysis
2022-06-16 00:00:00 2022-12-16 00:00:00




LinearDiscriminantAnalysis
2022-06-20 00:00:00 2022-12-19 00:00:00




LinearDiscriminantAnalysis
2022-06-20 00:00:00 2022-12-20 00:00:00




LinearDiscriminantAnalysis
2022-06-21 00:00:00 2022-12-21 00:00:00




LinearDiscriminantAnalysis
2022-06-22 00:00:00 2022-12-22 00:00:00




LinearDiscriminantAnalysis
2022-06-23 00:00:00 2022-12-23 00:00:00




LinearDiscriminantAnalysis
2022-06-27 00:00:00 2022-12-26 00:00:00




LinearDiscriminantAnalysis
2022-06-27 00:00:00 2022-12-27 00:00:00




LinearDiscriminantAnalysis
2022-06-28 00:00:00 2022-12-28 00:00:00




LinearDiscriminantAnalysis
2022-06-29 00:00:00 2022-12-29 00:00:00




LinearDiscriminantAnalysis
2022-06-30 00:00:00 2022-12-30 00:00:00




LinearDiscriminantAnalysis
2022-07-01 00:00:00 2023-01-02 00:00:00




GradientBoostingClassifier
2022-07-04 00:00:00 2023-01-03 00:00:00




GradientBoostingClassifier
2022-07-04 00:00:00 2023-01-04 00:00:00




GradientBoostingClassifier
2022-07-05 00:00:00 2023-01-05 00:00:00




GradientBoostingClassifier
2022-07-06 00:00:00 2023-01-06 00:00:00




GradientBoostingClassifier
2022-07-08 00:00:00 2023-01-09 00:00:00




GradientBoostingClassifier
2022-07-11 00:00:00 2023-01-10 00:00:00




GradientBoostingClassifier
2022-07-11 00:00:00 2023-01-11 00:00:00




GradientBoostingClassifier
2022-07-12 00:00:00 2023-01-12 00:00:00




GradientBoostingClassifier
2022-07-13 00:00:00 2023-01-13 00:00:00




GradientBoostingClassifier
2022-07-15 00:00:00 2023-01-16 00:00:00


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


GradientBoostingClassifier
2022-07-18 00:00:00 2023-01-17 00:00:00




GradientBoostingClassifier
2022-07-18 00:00:00 2023-01-18 00:00:00




GradientBoostingClassifier
2022-07-19 00:00:00 2023-01-19 00:00:00




GradientBoostingClassifier
2022-07-20 00:00:00 2023-01-20 00:00:00




GradientBoostingClassifier
2022-07-22 00:00:00 2023-01-23 00:00:00




GradientBoostingClassifier
2022-07-25 00:00:00 2023-01-24 00:00:00




GradientBoostingClassifier
2022-07-25 00:00:00 2023-01-25 00:00:00




GradientBoostingClassifier
2022-07-26 00:00:00 2023-01-26 00:00:00




GradientBoostingClassifier
2022-07-27 00:00:00 2023-01-27 00:00:00




GradientBoostingClassifier
2022-07-29 00:00:00 2023-01-30 00:00:00




GradientBoostingClassifier
2022-08-01 00:00:00 2023-01-31 00:00:00




GradientBoostingClassifier
2022-08-01 00:00:00 2023-02-01 00:00:00




LinearDiscriminantAnalysis
2022-08-02 00:00:00 2023-02-02 00:00:00




LinearDiscriminantAnalysis
2022-08-03 00:00:00 2023-02-03 00:00:00




LinearDiscriminantAnalysis
2022-08-05 00:00:00 2023-02-06 00:00:00




LinearDiscriminantAnalysis
2022-08-08 00:00:00 2023-02-07 00:00:00




LinearDiscriminantAnalysis
2022-08-08 00:00:00 2023-02-08 00:00:00




LinearDiscriminantAnalysis
2022-08-09 00:00:00 2023-02-09 00:00:00
LinearDiscriminantAnalysis
2022-08-10 00:00:00 2023-02-10 00:00:00
LinearDiscriminantAnalysis
2022-08-12 00:00:00 2023-02-13 00:00:00
LinearDiscriminantAnalysis
2022-08-15 00:00:00 2023-02-14 00:00:00
LinearDiscriminantAnalysis
2022-08-15 00:00:00 2023-02-15 00:00:00
LinearDiscriminantAnalysis
2022-08-16 00:00:00 2023-02-16 00:00:00
LinearDiscriminantAnalysis
2022-08-17 00:00:00 2023-02-17 00:00:00
LinearDiscriminantAnalysis
2022-08-19 00:00:00 2023-02-20 00:00:00
LinearDiscriminantAnalysis
2022-08-22 00:00:00 2023-02-21 00:00:00
LinearDiscriminantAnalysis
2022-08-22 00:00:00 2023-02-22 00:00:00
LinearDiscriminantAnalysis
2022-08-23 00:00:00 2023-02-23 00:00:00
LinearDiscriminantAnalysis
2022-08-24 00:00:00 2023-02-24 00:00:00
LinearDiscriminantAnalysis
2022-08-26 00:00:00 2023-02-27 00:00:00
LinearDiscriminantAnalysis
2022-08-29 00:00:00 2023-02-28 00:00:00
LinearDiscriminantAnalysis
2022-09-01 00:00:00 2023-03-01 00:0



XGBClassifier
2022-02-02 00:00:00 2022-08-02 00:00:00




XGBClassifier
2022-02-03 00:00:00 2022-08-03 00:00:00




XGBClassifier
2022-02-04 00:00:00 2022-08-04 00:00:00




XGBClassifier
2022-02-04 00:00:00 2022-08-05 00:00:00




XGBClassifier
2022-02-08 00:00:00 2022-08-08 00:00:00




XGBClassifier
2022-02-09 00:00:00 2022-08-09 00:00:00




XGBClassifier
2022-02-10 00:00:00 2022-08-10 00:00:00




XGBClassifier
2022-02-11 00:00:00 2022-08-11 00:00:00


  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])


XGBClassifier
2022-02-11 00:00:00 2022-08-12 00:00:00


  X2 = np.dot(Xm, R * (S ** (-0.5)))
  X2 = np.dot(Xm, R * (S ** (-0.5)))
  u = np.asarray([np.sum(np.log(s)) for s in self.scalings_])


XGBClassifier
2022-02-15 00:00:00 2022-08-15 00:00:00


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


XGBClassifier
2022-02-16 00:00:00 2022-08-16 00:00:00




XGBClassifier
2022-02-17 00:00:00 2022-08-17 00:00:00




XGBClassifier
2022-02-18 00:00:00 2022-08-18 00:00:00




XGBClassifier
2022-02-18 00:00:00 2022-08-19 00:00:00




XGBClassifier
2022-02-22 00:00:00 2022-08-22 00:00:00




XGBClassifier
2022-02-23 00:00:00 2022-08-23 00:00:00




XGBClassifier
2022-02-24 00:00:00 2022-08-24 00:00:00




XGBClassifier
2022-02-25 00:00:00 2022-08-25 00:00:00




XGBClassifier
2022-02-25 00:00:00 2022-08-26 00:00:00




XGBClassifier
2022-02-28 00:00:00 2022-08-29 00:00:00




XGBClassifier
2022-02-28 00:00:00 2022-08-30 00:00:00




XGBClassifier
2022-02-28 00:00:00 2022-08-31 00:00:00




XGBClassifier
2022-03-01 00:00:00 2022-09-01 00:00:00




ExtraTreesClassifier
2022-03-02 00:00:00 2022-09-02 00:00:00




ExtraTreesClassifier
2022-03-04 00:00:00 2022-09-05 00:00:00




ExtraTreesClassifier
2022-03-07 00:00:00 2022-09-06 00:00:00




ExtraTreesClassifier
2022-03-07 00:00:00 2022-09-07 00:00:00


KeyboardInterrupt: 

In [9]:
# Fusionner predictions_df et df_btc sur la colonne 'date'
df_merged = pd.merge(predictions_df, df_btc[['date', 'target']], on='date')

# Comparer les colonnes 'prediction' et 'target'
df_merged['correct'] = df_merged['prediction'] == df_merged['target']

# Compter le nombre de prédictions correctes
num_correct_predictions = df_merged['correct'].sum()

print(f"Nombre de prédictions correctes : {num_correct_predictions}")
# Filtrer pour les prédictions correctes
correct_predictions = df_merged[df_merged['correct']]

# Compter le nombre de 0 et de 1 dans les prédictions correctes
num_correct_zeros = (correct_predictions['target'] == 0).sum()
num_correct_ones = (correct_predictions['target'] == 1).sum()

print(f"Nombre de 0 correctement prédits : {num_correct_zeros}")
print(f"Nombre de 1 correctement prédits : {num_correct_ones}")

Nombre de prédictions correctes : 117
Nombre de 0 correctement prédits : 65
Nombre de 1 correctement prédits : 52
