In [3]:
# Import libraries

from mlflow import MlflowClient
import mlflow

import pandas as pd
import numpy as np
import sklearn.preprocessing as pp

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import RandomUnderSampler, ClusterCentroids
from xgboost import XGBClassifier

from tensorflow.keras.layers import Input, Dense,  Activation, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model, Sequential
from sklearn.preprocessing import LabelEncoder

# Define tracking_uri
client = MlflowClient(tracking_uri="http://127.0.0.1:5000")

# Define experiment name, run name and artifact_path name
meteo_mean_experiment = mlflow.set_experiment("Meteo_cosin_rnn_xgb")
run_name = "rnn_xgb1"
artifact_path = "rnn_xgb_cosin"

# Import database

df = pd.read_csv('df_full_cosin.csv', sep=',', header=0)

df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values(by = 'Date')
df = df.drop(["Date", "Location"], axis = 1)
#df = df.drop(["Cluster"], axis = 1)
df["RainTomorrow"]  = df["RainTomorrow"].astype(np.int8)

# Preprocessing

target = df['RainTomorrow']
data = df.drop('RainTomorrow', axis=1)

encoder =  LabelEncoder()
target = encoder.fit_transform(target)

X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.25, shuffle=False)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#smo = SMOTE()
#X_sm, y_sm = smo.fit_resample(X_train, y_train)

#rOs = RandomOverSampler()
#X_sm, y_sm = rOs.fit_resample(X_train, y_train)

#rUs = RandomUnderSampler()
#X_sm, y_sm = rUs.fit_resample(X_train, y_train)

#cc = ClusterCentroids()
#X_sm, y_sm = cc.fit_resample(X_train, y_train)

# Train model

inputs = Input(shape = (24), name = "Input")
dense1 = Dense(units = 24, activation = "tanh", name = "Couche_1")
dense2 = Dense(units = 16, activation = "tanh", name = "Couche_2")
dense3 = Dense(units = 10, activation = "tanh", name = "Couche_3")
dense4 = Dense(units = 6, activation = "tanh", name = "Couche_4")
dense5 = Dense(units = 1, activation = 'sigmoid', name = "Couche_5")

"""
con1
inputs = Input(shape = (24), name = "Input")
dense1 = Dense(units = 24, activation = "relu", name = "Couche_1")
dense2 = Dense(units = 16, activation = "relu", name = "Couche_2")
dense3 = Dense(units = 10, activation = "relu", name = "Couche_3")
dense4 = Dense(units = 1, activation = 'sigmoid', name = "Couche_4")

con2
inputs = Input(shape = (24), name = "Input")
dense1 = Dense(units = 24, activation = "relu", name = "Couche_1")
dense2 = Dense(units = 16, activation = "relu", name = "Couche_2")
dense3 = Dense(units = 10, activation = "relu", name = "Couche_3")
dense4 = Dense(units = 6, activation = "relu", name = "Couche_4")
dense5 = Dense(units = 1, activation = 'sigmoid', name = "Couche_5")

"""

x = dense1(inputs)
x = dense2(x)
x = dense3(x)
x = dense4(x)
outputs = dense5(x)

model = Model(inputs = inputs, outputs = outputs)
#model.summary()

params = {
    "loss": 'BinaryCrossentropy',
    'optimizer': 'Adam(learning_rate=0.001)',
    'metrics': 'accuracy',
    'epochs': 500,
    'batch_size': 32,
    'validation_split': 0.2
}

early_stopping = EarlyStopping(monitor = 'val_loss',
                    min_delta = 0.01,
                    patience = 5,
                    verbose = 1,
                    restore_best_weights = True,
                    mode='min')

opt = Adam(learning_rate=0.001)

model.compile(loss ="BinaryCrossentropy",
              optimizer = opt,
              metrics = ["accuracy"])

model.fit(X_train, y_train, epochs = 500, batch_size = 32, validation_split = 0.2, callbacks = [early_stopping])

#test_pred = model.predict(X_test)
#print(test_pred)
#y_test_class = y_test
#y_test_class = y_test_class.apply(lambda val: int(val))
#print(y_test_class)

#y_pred_class = np.where(test_pred >= 0.5, 1, 0)
#y_pred_class = np.concatenate(y_pred_class).ravel().tolist()
#print(y_pred_class)

# Get output from model

intermediate_layer_model = Model(inputs=model.input, outputs=model.layers[2].output)
X_train_features = intermediate_layer_model.predict(X_train)
X_test_features = intermediate_layer_model.predict(X_test)

# XGB 

params = {
    "learning_rate": 0.05,
    'max_depth': 8,
    'n_estimators': 500
}



xgb = XGBClassifier(learning_rate=0.05, max_depth=8, n_estimators=500)

# Evaluate model

xgb.fit(X_train_features, y_train)

preds = xgb.predict(X_test_features)

# Evaluate model

co = pd.crosstab(y_test, preds, rownames=['Classe réelle'], colnames=['Classe prédite'])
display(co)

cr = classification_report(y_test, preds, output_dict=True)
cra = pd.DataFrame(cr).transpose()
display(cra)

tp_1 = co.iloc[1,1]
tn_0 = co.iloc[0,0]
fp_1 = co.iloc[0,1]
fn_0 = co.iloc[1,0]
prec_0 = cra.iloc[0,0]
prec_1 = cra.iloc[1,0]
rec_0 = cra.iloc[0,1]
rec_1 = cra.iloc[1,1]
f1_0 = cra.iloc[0,2]
f1_1 = cra.iloc[1,2]
acc = cra.iloc[2,0]
metrics = {"vrai positifs pred 1": tp_1, "vrai négatifs pred 0": tn_0, "faux positifs pred 1": fp_1, "faux négatifs pred 0": fn_0, 'précision 0': prec_0, 'précision 1': prec_1, 'recall 0': rec_0, 'recall 1': rec_1, 'f1 0': f1_0, 'f1_1': f1_1, 'accuracy': acc}

# Store information in tracking server
with mlflow.start_run(run_name=run_name) as run:
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(
        sk_model=model, input_example=X_test, artifact_path=artifact_path
    )




















ModuleNotFoundError: No module named 'mlflow'