##### Importation des librairies

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor,GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from datetime import datetime, timedelta
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_selection import SelectFromModel
import numpy as np
import tensorflow as tf
from config import Config, Constant
import warnings
warnings.filterwarnings('ignore')

In [2]:
df=pd.read_csv(str(Config.DATASET_DIR) + '/' + str(Config.DATA_1DEK_EXTR))

In [3]:
df.head()

Unnamed: 0,Station,Year,Month,Decade,v_wind_975,u_wind_700,u_wind_100,eau_precipitable,t_point_rosee,h_vol_sol_wat,anom_lef_dek,anom_nino_dek,Date,Label Secheresse,Saison_Pluie
0,Bobo_Dioulasso,1961.0,1.0,1.0,-2.477929,-6.607575,16.585018,7.668101e-08,274.869892,0.177988,0.366058,-0.024109,1961-01-01,0.0,False
1,Bogande,1961.0,1.0,1.0,-2.902994,-3.065768,17.778388,6.194008e-08,269.371269,0.080786,0.366058,-0.024109,,,
2,Boromo,1961.0,1.0,1.0,-3.516664,-5.668901,16.643275,3.728255e-08,274.435394,0.168747,0.366058,-0.024109,1961-01-01,0.0,False
3,Dedougou,1961.0,1.0,1.0,-3.342931,-4.970608,18.23231,8.872772e-09,274.004411,0.124276,0.366058,-0.024109,1961-01-01,0.0,False
4,Dori,1961.0,1.0,1.0,-3.13265,-1.264646,19.343432,-5.116298e-08,267.627423,0.18111,0.366058,-0.024109,1961-01-01,0.0,False


#### Creation des fonctions utiles

In [4]:
# Définition de la fonction to_numeric_with_nan
def to_numeric_with_nan(value):
    try:
        return int(value)
    except (ValueError, TypeError):
        return pd.NA  # Retourne une valeur manquante

In [5]:
def label_change(value):
    if pd.isna(value):
        return pd.NA
    elif value == 0.0:
        return '0'
    elif value == 1.0:
        return '1'
    else:
        return value  # Si la valeur est différente de NaN, 0.0 et 1.0, la renvoyer telle quelle

In [6]:
def load_data_dek(file_path):
    df = pd.read_csv(file_path)    
    # Supprimer les lignes contenant des valeurs NaN
    df.dropna(axis=0, inplace=True)   
    # Convertir les colonnes Year, Month et Decade en numérique (si nécessaire)
    df[['Year', 'Month', 'Decade']] = df[['Year', 'Month', 'Decade']].applymap(to_numeric_with_nan)    
    # Appliquer la fonction label_change à la colonne 'Label Secheresse'
    df['Label Secheresse'] = df['Label Secheresse'].apply(label_change)  
    # Encodage des valeurs qualitatives
    ordinal_columns = ['Station', 'Saison_Pluie']  # Liste des colonnes catégorielles ordinales
    encoder = LabelEncoder()    
    for col in ordinal_columns:
        df[col] = encoder.fit_transform(df[col])   
    # Décaler la sécheresse de deux mois en avant pour la prédiction
    df['Secheresse_future'] = df['Label Secheresse'].shift(2)
    df.dropna(axis=0, inplace=True)  
    return df


In [7]:
def load_data_mon(file_path):
    df = pd.read_csv(file_path)    
    # Supprimer les lignes contenant des valeurs NaN
    df.dropna(axis=0, inplace=True)   
    # Convertir les colonnes Year, Month et Decade en numérique (si nécessaire)
    # Vérifier si la colonne 'Decade' existe et la supprimer si nécessaire
    if 'Decade' in df.columns:
        df.drop('Decade', axis=1, inplace=True)
    df[['Year', 'Month']] = df[['Year', 'Month']].applymap(to_numeric_with_nan)    
    # Appliquer la fonction label_change à la colonne 'Label Secheresse'
    df['Label Secheresse'] = df['Label Secheresse'].apply(label_change)  
    # Encodage des valeurs qualitatives
    ordinal_columns = ['Station', 'Saison_Pluie']  # Liste des colonnes catégorielles ordinales
    encoder = LabelEncoder()    
    for col in ordinal_columns:
        df[col] = encoder.fit_transform(df[col])   
    # Décaler la sécheresse de deux mois en avant pour la prédiction
    df['Secheresse_future'] = df['Label Secheresse'].shift(2)
    df.dropna(axis=0, inplace=True)  
    return df


In [8]:
features=['Station','v_wind_975','u_wind_700','u_wind_100','eau_precipitable','t_point_rosee','h_vol_sol_wat','anom_lef_dek','anom_nino_dek','Saison_Pluie']
feature_rnn=['Station','v_wind_975','u_wind_700','u_wind_100','eau_precipitable','t_point_rosee','h_vol_sol_wat','anom_lef_dek','anom_nino_dek','Saison_Pluie','Secheresse_future']

In [9]:


def train_and_evaluate_model_ranf(df):
    # Supprimer les colonnes inutiles ou non pertinentes pour la classification
    # Vérifier si la colonne 'Decade' existe et la supprimer si nécessaire
    if 'Decade' in df.columns:
        df.drop('Decade', axis=1, inplace=True)
    df = df.drop(['Station', 'Year', 'Month', 'Date'], axis=1)

    # Diviser les données en caractéristiques (X) et étiquettes (y)
    X = df.drop('Secheresse_future', axis=1)
    y = df['Secheresse_future']

    # Créer le modèle RandomForest
    model = RandomForestClassifier(random_state=42)

    # Utiliser un sélecteur de caractéristiques basé sur l'importance des fonctionnalités
    feature_selector = SelectFromModel(model, threshold='median')
    feature_selector.fit(X, y)

    # Obtenir les caractéristiques retenues
    selected_features = X.columns[feature_selector.get_support()]

    # Appliquer la sélection de caractéristiques aux données
    X_selected = feature_selector.transform(X)

    # Définir une grille d'hyperparamètres à rechercher
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    # Créer une instance de GridSearchCV
    grid_search = GridSearchCV(model, param_grid, cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
                               n_jobs=-1, verbose=2)

    # Effectuer la recherche d'hyperparamètres
    grid_search.fit(X_selected, y)

    # Obtenir le meilleur modèle
    best_model = grid_search.best_estimator_

    # Évaluer le modèle avec une validation croisée k-fold
    k_fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(best_model, X_selected, y, cv=k_fold, scoring='accuracy')
    mean_accuracy = scores.mean()

    print("Best Parameters:", grid_search.best_params_)
    print("Selected Features:", selected_features)
    print("Mean Cross-Validated Accuracy:", mean_accuracy)

    return best_model

In [10]:
def train_and_evaluate_boost(df):
    # Supprimer les colonnes inutiles ou non pertinentes pour la classification
    # Vérifier si la colonne 'Decade' existe et la supprimer si nécessaire
    if 'Decade' in df.columns:
        df.drop('Decade', axis=1, inplace=True)
    df = df.drop(['Station', 'Year', 'Month', 'Date'], axis=1)

    # Diviser les données en caractéristiques (X) et étiquettes (y)
    X = df.drop('Secheresse_future', axis=1)
    y = df['Secheresse_future']

    # Créer le modèle Gradient Boosting
    model = GradientBoostingClassifier(random_state=42)

    # Utiliser un sélecteur de caractéristiques basé sur l'importance des fonctionnalités
    feature_selector = SelectFromModel(model, threshold='median')
    feature_selector.fit(X, y)

    # Obtenir les caractéristiques retenues
    selected_features = X.columns[feature_selector.get_support()]

    # Appliquer la sélection de caractéristiques aux données
    X_selected = feature_selector.transform(X)

    # Définir une grille d'hyperparamètres à rechercher
    param_grid = {
        'n_estimators': [50, 100, 200],
        'learning_rate': [0.01, 0.1, 0.2],
        'max_depth': [3, 4, 5],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    # Créer une instance de GridSearchCV
    grid_search = GridSearchCV(model, param_grid, cv=StratifiedKFold(n_splits=5, shuffle=True, random_state=42),
                               n_jobs=-1, verbose=2)

    # Effectuer la recherche d'hyperparamètres
    grid_search.fit(X_selected, y)

    # Obtenir le meilleur modèle
    best_model = grid_search.best_estimator_

    # Évaluer le modèle avec une validation croisée k-fold
    k_fold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(best_model, X_selected, y, cv=k_fold, scoring='accuracy')
    mean_accuracy = scores.mean()

    print("Best Parameters:", grid_search.best_params_)
    print("Selected Features:", selected_features)
    print("Mean Cross-Validated Accuracy:", mean_accuracy)

    return best_model


In [11]:
def train_and_evaluate_model(X, y, model, param_grid, cv=5):
    # Prétraitement des données
    imputer = SimpleImputer(strategy='mean')
    scaler = StandardScaler()

    # Créer le sélecteur de caractéristiques basé sur l'importance des fonctionnalités
    feature_selector = SelectFromModel(model, threshold='median')

    # Créer le pipeline de prétraitement, de sélection de caractéristiques et de modèle
    pipeline = Pipeline([
        ('imputer', imputer),
        ('scaler', scaler),
        ('feature_selector', feature_selector),
        ('model', model)
    ])

    # Créer le modèle GridSearchCV
    grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=cv, scoring='accuracy')

    # Entraîner le modèle GridSearchCV avec validation croisée
    grid_search.fit(X, y)

    # Obtenir les meilleures valeurs d'hyperparamètres
    best_params = grid_search.best_params_

    # Obtenir le modèle avec les meilleurs hyperparamètres
    best_model = grid_search.best_estimator_

    # Évaluer le modèle avec validation croisée
    scores = cross_val_score(best_model, X, y, cv=cv, scoring='accuracy')
    mean_accuracy = scores.mean()
    std_accuracy = scores.std()

    return best_params, best_model, mean_accuracy, std_accuracy

In [12]:
def train_and_evaluate_rnn_model(train_data, train_labels, test_data, test_labels):
    # Créer le modèle RNN
    model = tf.keras.models.Sequential([
        tf.keras.layers.SimpleRNN(units=64, activation='relu', input_shape=(None, train_data.shape[2])),
        tf.keras.layers.Dense(units=1)
    ])
    
    # Compiler le modèle
    model.compile(optimizer='adam', loss='mse')
    
    # Entraîner le modèle
    model.fit(train_data, train_labels, epochs=10, batch_size=32)
    
    # Évaluer le modèle sur les données de test
    loss = model.evaluate(test_data, test_labels)
    print("Loss (MSE) on test data:", loss)
    
    return model

In [13]:
def prepare_data_for_rnn(df, target_column, test_size=0.2):
    # Sélectionner les colonnes d'entités (X) en excluant la colonne cible
    features = df.drop(target_column, axis=1).values.astype(np.float32)
    
    # Sélectionner la colonne cible (y)
    target = df[target_column].values.astype(np.float32)
    
    # Fractionner les données en ensembles d'entraînement et de test
    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=test_size, shuffle=True)
    
    # Réorganiser les données dans le format approprié pour un RNN
    X_train = np.expand_dims(X_train, axis=-1)  # Ajouter une dimension à la fin des échantillons
    X_test = np.expand_dims(X_test, axis=-1)  # Ajouter une dimension à la fin des échantillons
    y_train = np.expand_dims(y_train, axis=-1)  # Ajouter une dimension à la fin des échantillons
    y_test = np.expand_dims(y_test, axis=-1)  # Ajouter une dimension à la fin des échantillons
    
    return X_train, y_train, X_test, y_test

### ENTRAINEMENT SUR LES DONNNEES DECADAIRES

#### Extreme

In [14]:

data_ext_1dek=load_data_dek(str(Config.DATASET_DIR) + '/' + str(Config.DATA_1DEK_EXTR))

#### RandonForrest

In [15]:
rand_ext_1dek=train_and_evaluate_model_ranf(data_ext_1dek)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.5s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.5s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.5s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.8s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.9s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   3.0s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.9s
[CV] END max_de

#### BOOST

In [16]:
boost_ext_1dek=train_and_evaluate_boost(data_ext_1dek)

Fitting 5 folds for each of 243 candidates, totalling 1215 fits
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.1s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.1s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.1s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.1s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.1s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.4s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100; 

#### RNN

In [17]:
data_ext_1dek=data_ext_1dek[feature_rnn]
X_train, y_train, X_test, y_test=prepare_data_for_rnn(data_ext_1dek,target_column='Secheresse_future', test_size=0.2)

In [18]:
data_ext_1dek.head()

Unnamed: 0,Station,v_wind_975,u_wind_700,u_wind_100,eau_precipitable,t_point_rosee,h_vol_sol_wat,anom_lef_dek,anom_nino_dek,Saison_Pluie,Secheresse_future
3,3,-3.342931,-4.970608,18.23231,8.872772e-09,274.004411,0.124276,0.366058,-0.024109,0,0
4,4,-3.13265,-1.264646,19.343432,-5.116298e-08,267.627423,0.18111,0.366058,-0.024109,0,0
5,5,-2.70915,-4.116544,16.559061,-2.382527e-08,270.621893,0.117374,0.366058,-0.024109,0,0
6,6,-2.203496,-7.391964,14.800494,4.13028e-08,275.489078,0.138957,0.366058,-0.024109,0,0
7,7,-2.171745,-4.48842,17.522954,-1.847897e-09,273.942325,0.241372,0.366058,-0.024109,0,0


In [19]:
train_and_evaluate_rnn_model(X_train,y_train, X_test, y_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss (MSE) on test data: 0.04753598943352699


<keras.src.engine.sequential.Sequential at 0x1740a0ca0>

#### Severe

In [20]:
data_sev_1dek=pd.read_csv(str(Config.DATASET_DIR) + '/' + str(Config.DATA_1DEK_SEV))

In [21]:
data_sev_1dek.head()

Unnamed: 0,Station,Year,Month,Decade,v_wind_975,u_wind_700,u_wind_100,eau_precipitable,t_point_rosee,h_vol_sol_wat,anom_lef_dek,anom_nino_dek,Date,Label Secheresse,Saison_Pluie
0,Bobo_Dioulasso,1961.0,1.0,1.0,-2.477929,-6.607575,16.585018,7.668101e-08,274.869892,0.177988,0.366058,-0.024109,1961-01-01,0.0,False
1,Bogande,1961.0,1.0,1.0,-2.902994,-3.065768,17.778388,6.194008e-08,269.371269,0.080786,0.366058,-0.024109,,,
2,Boromo,1961.0,1.0,1.0,-3.516664,-5.668901,16.643275,3.728255e-08,274.435394,0.168747,0.366058,-0.024109,1961-01-01,0.0,False
3,Dedougou,1961.0,1.0,1.0,-3.342931,-4.970608,18.23231,8.872772e-09,274.004411,0.124276,0.366058,-0.024109,1961-01-01,0.0,False
4,Dori,1961.0,1.0,1.0,-3.13265,-1.264646,19.343432,-5.116298e-08,267.627423,0.18111,0.366058,-0.024109,1961-01-01,0.0,False


In [22]:
data_sev_1dek=load_data_dek(str(Config.DATASET_DIR) + '/' + str(Config.DATA_1DEK_SEV))

#### RandonForrest

In [23]:
rand_sev_1dek=train_and_evaluate_model_ranf(data_sev_1dek)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.5s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.7s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.8s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.8s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.8s
[CV] END max_de

#### BOOST

In [24]:
boost_sev_1dek=train_and_evaluate_boost(data_sev_1dek)

Fitting 5 folds for each of 243 candidates, totalling 1215 fits
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.1s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.2s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.2s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.2s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.2s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100; 

#### RNN

In [25]:
data_sev_1dek=data_sev_1dek[feature_rnn]
X_train, y_train, X_test, y_test=prepare_data_for_rnn(data_sev_1dek,target_column='Secheresse_future', test_size=0.2)

In [26]:
train_and_evaluate_rnn_model(X_train,y_train, X_test, y_test)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Loss (MSE) on test data: 0.044040415436029434


<keras.src.engine.sequential.Sequential at 0x283fb5dc0>

### ENTRAINEMENT SPEI 1-MOIS

#### Extreme

In [27]:
data_ext_1mois=load_data_mon(str(Config.DATASET_DIR) + '/' + str(Config.DATA_1MON_EXTR))

#### RandonForrest

In [28]:
rand_ext_1mois=train_and_evaluate_model_ranf(data_ext_1mois)

Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.3s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.4s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.6s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.7s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.7s
[CV] END max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.7s
[CV] END max_de

#### BOOST

In [29]:
boost_ext_1mois=train_and_evaluate_boost(data_ext_1mois)

Fitting 5 folds for each of 243 candidates, totalling 1215 fits
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.3s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.4s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   0.4s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.7s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   0.7s
[CV] END learning_rate=0.01, max_depth=3, min_samples_leaf=1, min_samples_split=2, n_estimators=100; 

#### RNN

In [30]:
data_ext_1mois=data_ext_1mois[feature_rnn]
X_train, y_train, X_test, y_test=prepare_data_for_rnn(data_ext_1mois,target_column='Secheresse_future', test_size=0.2)

KeyError: "['anom_lef_dek', 'anom_nino_dek'] not in index"

In [None]:
train_and_evaluate_rnn_model(X_train,y_train, X_test, y_test)

#### Severe

In [None]:
data_sev_1mois=load_data_dek(str(Config.DATASET_DIR) + '/' + str(Config.DATA_1MON_SEV))

#### RandonForrest

In [None]:
rand_sev_1mois=train_and_evaluate_model_ranf(data_sev_1mois)

#### BOOST

In [None]:
boost_sev_1dek=train_and_evaluate_boost(data_sev_1dek)

#### RNN

In [None]:
data_sev_1dek=data_sev_1dek[feature_rnn]
X_train, y_train, X_test, y_test=prepare_data_for_rnn(data_sev_1dek,target_column='Secheresse_future', test_size=0.2)

In [None]:
train_and_evaluate_rnn_model(X_train,y_train, X_test, y_test)

### ENTRAINEMENT SPEI 3-MOIS

#### Extreme

In [None]:

data_ext_3mois=load_data_dek(str(Config.DATASET_DIR) + '/' + str(Config.DATA_3MON_EXTR))

#### RandonForrest

In [None]:
rand_ext_3mois=train_and_evaluate_model_ranf(data_ext_3mois)

#### BOOST

In [None]:
boost_ext_3mois=train_and_evaluate_boost(data_ext_3mois)

#### RNN

In [None]:
data_ext_3mois=data_ext_3mois[feature_rnn]
X_train, y_train, X_test, y_test=prepare_data_for_rnn(data_ext_3mois,target_column='Secheresse_future', test_size=0.2)

In [None]:
train_and_evaluate_rnn_model(X_train,y_train, X_test, y_test)

#### Severe

In [None]:
data_sev_3mois=load_data_mon(str(Config.DATASET_DIR) + '/' + str(Config.DATA_3MON_SEV))

#### RandonForrest

In [None]:
rand_sev_3mois=train_and_evaluate_model_ranf(data_sev_3mois)

#### BOOST

In [None]:
boost_sev_3mois=train_and_evaluate_boost(data_sev_3mois)

#### RNN

In [None]:
data_sev_3mois=data_sev_3mois[feature_rnn]
X_train, y_train, X_test, y_test=prepare_data_for_rnn(data_sev_3mois,target_column='Secheresse_future', test_size=0.2)

In [None]:
train_and_evaluate_rnn_model(X_train,y_train, X_test, y_test)

### ENTRAINEMENT SPEI 6-MOIS

#### Extreme

In [None]:

data_ext_6mois=load_data_mon(str(Config.DATASET_DIR) + '/' + str(Config.DATA_6MON_EXTR))

#### RandonForrest

In [None]:
rand_ext_6mois=train_and_evaluate_model_ranf(data_ext_6mois)

#### BOOST

In [None]:
boost_ext_6mois=train_and_evaluate_boost(data_ext_6mois)

#### RNN

In [None]:
data_ext_6mois=data_ext_6mois[feature_rnn]
X_train, y_train, X_test, y_test=prepare_data_for_rnn(data_ext_6mois,target_column='Secheresse_future', test_size=0.2)

In [None]:
train_and_evaluate_rnn_model(X_train,y_train, X_test, y_test)

#### Severe

In [None]:
data_sev_6mois=load_data_dek(str(Config.DATASET_DIR) + '/' + str(Config.DATA_6MON_SEV))

#### RandonForrest

In [None]:
rand_sev_6mois=train_and_evaluate_model_ranf(data_sev_6mois)

#### BOOST

In [None]:
boost_sev_6mois=train_and_evaluate_boost(data_sev_6mois)

#### RNN

In [None]:
data_sev_6mois=data_sev_6mois[feature_rnn]
X_train, y_train, X_test, y_test=prepare_data_for_rnn(data_sev_6mois,target_column='Secheresse_future', test_size=0.2)

In [None]:
train_and_evaluate_rnn_model(X_train,y_train, X_test, y_test)

### ENTRAINEMENT SPEI 1-MOIS