In [2]:
import sys
sys.path.append('..')
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from interface.load_data import (
    load_data_fault_free_test,
    load_data_fault_free_train,
    load_data_faulty_test,
    load_data_faulty_train,
    load_data_fault_free_test_select

)
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [3]:
from keras import Sequential, Input, layers
from keras.callbacks import EarlyStopping

In [4]:
faulty_train = load_data_faulty_train()
faulty_train.shape

(5000000, 55)

In [5]:
mask = [i*10 for i in range(int(faulty_train.shape[0]/10))]

faulty_train_w = faulty_train.iloc[mask]
faulty_train_w.shape

(500000, 55)

In [None]:
#list = [slice() for i in range]
#np.r_list

In [5]:
list_slice = [slice(i * 50, (i+1) * 50) for i in range(1000)]
#list_fault = [[list_slice[0]...list_slice[49], []]
list_fault = [[list_slice[i+j*50] for i in range(50)] for j in range(20)]

In [None]:
def choose_faults(len_sample=500):
    list_slice = [slice(i * len_sample, (i+1) * len_sample) for i in range(len_sample*20)]
    list_fault = [[list_slice[i+j*len_sample] for i in range(50)] for j in range(20)]


In [6]:
X = faulty_train.drop(columns=['faultNumber', 'simulationRun', 'sample'])
y = faulty_train.faultNumber

In [7]:
X_w = faulty_train_w.drop(columns=['faultNumber', 'simulationRun', 'sample'])
y_w = faulty_train_w.faultNumber

In [53]:
X_feat_select = faulty_train_w[['xmeas_6', 'xmeas_18', 'xmeas_7', 'xmeas_13']]
y_feat_select = faulty_train_w.faultNumber

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_w, y_w, test_size=0.3)

In [9]:
scaler = StandardScaler()

X_train_scaled = pd.DataFrame(scaler.fit_transform(X_train), columns=X.columns)
X_test_scaled = pd.DataFrame(scaler.transform(X_test), columns=X.columns)

In [10]:
n_simulation = 500
len_simulation = 10000
n_per_sample = 50
n_fault = 20
n_features = 52

In [11]:
X_train_reshaped = X_train_scaled.values.reshape(round(len_simulation*0.7), n_per_sample, n_features)
y_train_reshaped = y_train.values.reshape(round(len_simulation*0.7), n_per_sample)[:, 0]

X_test_reshaped = X_test_scaled.values.reshape(round(len_simulation*0.3), n_per_sample, n_features)
y_test_reshaped = y_test.values.reshape(round(len_simulation*0.3), n_per_sample)[:, 0]

In [12]:
print(X_test_reshaped.shape)
print(y_train_reshaped.shape)

(3000, 50, 52)
(7000,)


In [None]:
from sklearn.model_selection import TimeSeriesSplit

# 2. Préparation de la validation croisée
tscv = TimeSeriesSplit(n_splits=5)

print(f"Shape globale : {X_w.shape}")

# 3. Boucle d'entraînement
for fold, (train_index, test_index) in enumerate(tscv.split(X_w)):
    print(f"\n--- Fold {fold+1} ---")

    # A. Découpage
    X_train_2, X_test_2 = X.iloc[train_index], X.iloc[test_index]
    y_train_2, y_test_2 = X_w.iloc[train_index], y_w.iloc[test_index]

    # B. Normalisation (L'étape critique avec 52 features)
    # On doit aplatir les données pour le scaler : (N_seq * Time, Features)
    # Cela permet de normaliser chaque feature indépendamment du temps

    num_features = X_train_2.shape[1]        # Number of features
    timesteps_per_sequence = 50 # Assuming 500 timesteps per sequence. PLEASE VERIFY.

    # Ensure training set has complete sequences by trimming excess rows
    num_rows_train_fold = X_train_2.shape[0]
    remainder_train = num_rows_train_fold % timesteps_per_sequence
    if remainder_train != 0:
        print(f"Warning: Trimming {remainder_train} rows from training set to ensure complete sequences.")
        X_train_2 = X_train_2.iloc[:-remainder_train]
        y_train_2 = y_train_2.iloc[:-remainder_train]
        num_rows_train_fold = X_train_2.shape[0]
    num_sequences_train_fold = num_rows_train_fold // timesteps_per_sequence

    # Ensure test set has complete sequences by trimming excess rows
    num_rows_test_fold = X_test_2.shape[0]
    remainder_test = num_rows_test_fold % timesteps_per_sequence
    if remainder_test != 0:
        print(f"Warning: Trimming {remainder_test} rows from test set to ensure complete sequences.")
        X_test_2 = X_test_2.iloc[:-remainder_test]
        y_test_2 = y_test_2.iloc[:-remainder_test]
        num_rows_test_fold = X_test_2.shape[0]
    num_sequences_test_fold = num_rows_test_fold // timesteps_per_sequence

    # Convert DataFrames to NumPy arrays before scaling and reshaping
    X_train_np = X_train_2.values
    X_test_np = X_test_2.values

    scaler = StandardScaler()

    # FIT only on TRAIN! The data is already in 2D (num_rows, num_features) for scaling.
    X_train_scaled_2d = scaler.fit_transform(X_train_np)
    # TRANSFORM on TEST (using stats from train)
    X_test_scaled_2d = scaler.transform(X_test_np)

    # Return to 3D for the RNN: (num_sequences, timesteps_per_sequence, num_features)
    X_train_final = X_train_scaled_2d.reshape(num_sequences_train_fold, timesteps_per_sequence, num_features)
    X_test_final = X_test_scaled_2d.reshape(num_sequences_test_fold, timesteps_per_sequence, num_features)

    print(f"Train shape: {X_train_final.shape}")
    print(f"Test shape:  {X_test_final.shape}")

    # C. Entraînement du modèle (Exemple Keras/TensorFlow)
    # model = create_rnn_model(input_shape=(500, 52), num_classes=20)
    # model.fit(X_train_final, y_train, validation_data=(X_test_final, y_test)...)

Shape globale : (500000, 52)

--- Fold 1 ---
Train shape: (1666, 50, 52)
Test shape:  (1666, 50, 52)

--- Fold 2 ---
Train shape: (3333, 50, 52)
Test shape:  (1666, 50, 52)

--- Fold 3 ---
Train shape: (5000, 50, 52)
Test shape:  (1666, 50, 52)

--- Fold 4 ---
Train shape: (6666, 50, 52)
Test shape:  (1666, 50, 52)

--- Fold 5 ---
Train shape: (8333, 50, 52)
Test shape:  (1666, 50, 52)


In [32]:
model = Sequential([
    Input(shape=(50, 52)),

    layers.Conv1D(64, kernel_size=7, activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(pool_size=2),

    layers.Conv1D(128, kernel_size=5, activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(pool_size=2),

    layers.Conv1D(256, kernel_size=3, activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(pool_size=2),

    layers.Conv1D(512, kernel_size=3, activation='relu', padding='same'),
    layers.BatchNormalization(),
    layers.MaxPooling1D(pool_size=2),

    layers.Flatten(),

    layers.Dense(512, activation='relu'),
    layers.Dropout(0.5),

    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),

    layers.Dense(21, activation='softmax')
])

In [33]:
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [34]:
y_test_2

4166670    14
4166680    14
4166690    14
4166700    14
4166710    14
           ..
4999620    20
4999630    20
4999640    20
4999650    20
4999660    20
Name: faultNumber, Length: 83300, dtype: int64

In [31]:
callbacks = [EarlyStopping(patience=15)]

history = model.fit(
    X_train_final, y_train_2,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    callbacks=callbacks,
)

Epoch 1/100


ValueError: Argument `output` must have rank (ndim) `target.ndim - 1`. Received: target.shape=(None, 52), output.shape=(None, 20)

In [31]:
model.evaluate(X_test_reshaped, y_test_reshaped)

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.2297 - loss: 4.5222


[4.522157669067383, 0.22966666519641876]

In [48]:
y_pred = model.predict(X_test_reshaped)
y_pred_pross = np.argmax(y_pred, axis=1)

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [50]:
from sklearn.metrics import classification_report, confusion_matrix

print(classification_report(y_test_reshaped, y_pred_pross))

              precision    recall  f1-score   support

           1       0.70      0.84      0.76       159
           2       0.82      0.88      0.85       155
           3       0.08      0.06      0.07       166
           4       0.07      0.06      0.06       142
           5       0.08      0.09      0.08       149
           6       0.91      0.89      0.90       141
           7       0.19      0.12      0.15       160
           8       0.14      0.12      0.13       145
           9       0.04      0.05      0.05       146
          10       0.09      0.05      0.06       159
          11       0.07      0.10      0.08       142
          12       0.13      0.15      0.14       144
          13       0.18      0.18      0.18       154
          14       0.07      0.07      0.07       153
          15       0.07      0.03      0.05       151
          16       0.09      0.11      0.10       157
          17       0.11      0.13      0.12       147
          18       0.58    

In [44]:
print(y_pred)
print(y_test_reshaped)

[[4.68535075e-18 9.58688106e-05 2.79025926e-06 1.25076156e-03
  2.33597038e-05 9.89856243e-01 5.38902183e-04 9.33874981e-05
  3.51306517e-04 2.44923052e-04 2.22030957e-03 6.44183252e-04
  1.69263699e-03 1.01384567e-03 1.17066320e-05 3.17437400e-04
  4.35826310e-04 2.62182846e-04 1.15579656e-04 5.47922740e-04
  2.80846405e-04]]
[ 3 10 10 ... 17  3  2]


In [15]:
y_w.nunique()

20