In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import tensorflow as tf
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_curve, auc
from sklearn.utils.class_weight import compute_class_weight

# Imports Keras (la nouvelle façon)
Sequential = tf.keras.models.Sequential
Conv1D = tf.keras.layers.Conv1D
MaxPooling1D = tf.keras.layers.MaxPooling1D
Flatten = tf.keras.layers.Flatten
Dense = tf.keras.layers.Dense
Dropout = tf.keras.layers.Dropout
BatchNormalization = tf.keras.layers.BatchNormalization
Adam = tf.keras.optimizers.Adam
AUC = tf.keras.metrics.AUC

# Ajoute le dossier parent au 'path' pour trouver 'src'
sys.path.append('../src')
from src.preprocess import normalize_flux

print("TensorFlow Version:", tf.__version__)

TensorFlow Version: 2.20.0


In [3]:
TRAIN_PATH = '../data/raw/train.csv'
TEST_PATH = '../data/raw/test.csv'

print("Chargement des données...")
df_train = pd.read_csv(TRAIN_PATH)
df_test = pd.read_csv(TEST_PATH)

df_train['LABEL'] = df_train['LABEL'].map({1: 0, 2: 1})
df_test['LABEL'] = df_test['LABEL'].map({1: 0, 2: 1})

X_train_raw = df_train.drop('LABEL', axis=1)
y_train = df_train['LABEL']
X_test_raw = df_test.drop('LABEL', axis=1)
y_test = df_test['LABEL']

X_train_filled = X_train_raw.fillna(0)
X_test_filled = X_test_raw.fillna(0)

print("Normalisation (detrending) des données...")
X_train_normalized = X_train_filled.apply(normalize_flux, axis=1).values
X_test_normalized = X_test_filled.apply(normalize_flux, axis=1).values
print("Normalisation terminée.")

# --- 2. Préparation pour le CNN ---
print("Préparation des données pour le CNN...")
X_train_cnn = X_train_normalized[:, :, np.newaxis]
X_test_cnn = X_test_normalized[:, :, np.newaxis]

class_weights = compute_class_weight(
    class_weight='balanced',
    classes=np.unique(y_train),
    y=y_train
)
class_weight_dict = {
    0: class_weights[0],
    1: class_weights[1]
}

print(f"Shape de X_train_cnn: {X_train_cnn.shape}")
print(f"Poids des classes: {class_weight_dict}")
print("--- Préparation terminée ---")

Chargement des données...
Normalisation (detrending) des données...
Normalisation terminée.
Préparation des données pour le CNN...
Shape de X_train_cnn: (5087, 3197, 1)
Poids des classes: {0: np.float64(0.5036633663366337), 1: np.float64(68.74324324324324)}
--- Préparation terminée ---
