In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Input
from tensorflow.keras.regularizers import l1_l2
import tensorflow as tf

from sklearn.metrics import mean_squared_error, classification_report, confusion_matrix
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler

import random
import os
from joblib import dump

# FONCTIONS
"""
detect_IQR(df, k_iqr, column_name)
afficher_listes_et_leurs_tailles(liste, nom_liste)
calcul_metriques(comm, true, detected) 
afficher_metriques(comm, true, detected, column) 
create_sequences(data, labels, lookback=1)
build_model(lookback, units, learning_rate, 11, 12)
prepare_classifier_data(X sequences, residuals, y_pred, y_actual, labels) 
couper_liste(liste, x)
plot_donnees_true_outliers_et_detect_with_iqr(data_brut)
plot_loss_epochs(history)
"""

def detect_IQR(df, k_iqr, column_name):
    d = df
    Q1 = d.quantile(0.25)
    Q3 = d.quantile(0.75)
    IQR = Q3 - Q1

    lower_bound = Q1 - k_iqr * IQR
    upper_bound = Q3 + k_iqr * IQR

    outlier_indices = df[(d<lower_bound) | (d>upper_bound)].index
    return outlier_indices

def afficher_listes_et_leurs_tailles(liste, nom_liste):
    print("Taille de ", nom_liste, " : ", len(liste))
    print(f'Liste {nom_liste} : ', liste)

def calcul_metriques(comm, true, detected):
    tp = len(comm)
    fp = len(detected) - len(comm)
    fn = len(true) - len(comm)

    if tp == 0:
        precision = 0
        recall = 0
        f1 = 0
    else:
        precision = tp / (tp + fp)
        recall = tp / (tp + fn)
        f1 = 2 * (precision * recall) / (precision + recall)

    return precision, recall, f1

def afficher_metriques(comm, true, detected, column):
    # column: bid ou ask par exemple
    res = calcul_metriques(comm, true, detected)
    print("RESULTATS (metriques):")
    print('*'*22)
    print(f'Column: {column}')
    print('-'* 4)
    print(f'Precision: {res[0]:.4f}')
    print(f'Recall: {res[1]:.4f}')
    print(f'F1: {res[2]:.4f}')
    print('*'*22)

def create_sequences(data, labels, lookback=1):
    X, y, y_labels = [], [], []
    for i in range(len(data) - lookback):
        X.append(data[i:i+lookback])
        y.append(data[i+lookback])
        y_labels.append(labels[i+lookback])
    return np.array(X), np.array(y), np.array(y_labels)

def build_model(lookback, units, learning_rate, l1, l2):
    model = Sequential([
        Input(shape=(lookback, 1)),
        LSTM(units, activation='relu', kernel_regularizer=l1_l2(l1, l2)),
        Dense(1)
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate), loss='MeanSquaredError')
    return model

def prepare_classifier_data(X_sequences, residuals, y_pred, y_actual, labels):
    # calculer la moyenne et l'écart-type de chaque séquence
    seq_means = np.mean(X_sequences, axis=1)
    seq_stds = np.std(X_sequences, axis=1)

    # construire le dataframe des features
    features = pd.DataFrame({
        'residual': residuals,
        'y_pred': y_pred.flatten(),
        'seq_mean': seq_means,
        'seq_std': seq_stds
    })

    # normalisation des features
    scaler = StandardScaler()
    features_scaled = scaler.fit_transform(features)

    return features_scaled, labels

def couper_liste(liste, x):
    index = next((i for i, val in enumerate(liste) if val >= x), len(liste))
    return liste[index:]

def plot_donnees_true_outliers_et_detect_with_iqr(data_brut):
    # le y
    t = np.arange(0, len(data_brut))
    plt.figure(figsize=(12, 6))
    plt.plot(t, data_brut, label='Donnes avec outliers')
    plt.scatter(t[true_outlier_indices], data_brut[true_outlier_indices], color='red', label='Outliers')
    plt.scatter(t[detected_outlier_indices], data_brut[detected_outlier_indices], color='black', label='Detectes')
    plt.legend()
    plt.show()

def plot_loss_epochs(history):
    plt.figure(figsize=(12, 6))
    plt.plot(history.history['loss'], label='loss-training')
    plt.plot(history.history['val_loss'], label='loss-validation')
    plt.title('Loss en fonction des epochs')
    plt.xlabel('epochs')
    plt.ylabel('loss')
    plt.legend()
    plt.show()
