In [56]:
from DSN import DeepSpectralNet
from Tool import split_dataset,standardize_data
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [61]:

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

from torch.utils.data import Dataset, DataLoader
from scipy.io import loadmat

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.neural_network import MLPRegressor
from sklearn.metrics import root_mean_squared_error,r2_score

In [37]:
def load_slice_localization():
    """
    UCI Slice Localization
    """

    # 1. URL Directe du fichier ZIP sur le site de l'Université de Californie (UCI)
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/00206/slice_localization_data.zip"

    print("Téléchargement et lecture du fichier CSV (cela peut prendre quelques secondes)...")
    df = pd.read_csv(url, compression='zip')

    # 2. Nettoyage et Séparation
    # La colonne 'reference' est la cible (la position de la slice)
    # La colonne 'patientId' est un identifiant qu'on retire généralement pour l'apprentissage
    y = df['reference'].values
    X = df.drop(columns=['reference', 'patientId']).values

    print("-" * 30)
    print(f"Shape de X : {X.shape}") 
    print(f"Shape de y : {y.shape}")
    print("-" * 30)

    if X.shape == (53500, 384):
        print("✅ SUCCÈS : C'est le bon dataset !")
    else:
        print("❌ ERREUR : Toujours pas le bon format.")

    return X, y

class RegressionDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)

    def __len__(self):
        return self.X.shape[0]

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [38]:
# Load
X, y = load_slice_localization()

# Split
X_train, y_train, X_val, y_val, X_test, y_test = split_dataset(X, y)

# Standardize
X_train, y_train, X_val, y_val, X_test, y_test, scaler_X, scaler_y = standardize_data(
    X_train, y_train,
    X_val, y_val,
    X_test, y_test
)

# PyTorch datasets
train_dataset = RegressionDataset(X_train, y_train)
val_dataset   = RegressionDataset(X_val, y_val)
test_dataset  = RegressionDataset(X_test, y_test)

# DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=256, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=256, shuffle=False)

print("Train:", len(train_dataset))
print("Val:", len(val_dataset))
print("Test:", len(test_dataset))
print("Input dim:", train_dataset[0][0].shape)

Téléchargement et lecture du fichier CSV (cela peut prendre quelques secondes)...
------------------------------
Shape de X : (53500, 384)
Shape de y : (53500,)
------------------------------
✅ SUCCÈS : C'est le bon dataset !
Train: 37450
Val: 8025
Test: 8025
Input dim: torch.Size([384])


In [39]:
mlp = MLPRegressor(
    hidden_layer_sizes=(256, 128, 64),
    activation='relu',
    solver='adam',
    max_iter=300,
    early_stopping=True,
    random_state=42
)

In [None]:
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
mse = root_mean_squared_error(y_test, y_pred)
print("RMSE:", mse)
r2 = r2_score(y_test, y_pred)
print("R²:", r2)

def count_params_mlp(model):
    return sum(w.size + b.size for w, b in zip(model.coefs_, model.intercepts_))
print(count_params_mlp(mlp))

MSE: 0.0437443904016065
R²: 0.9980656175783365
139777


In [46]:
def load_sarcos(path_train, path_test=None, standardize=True):
    """
    Charge le dataset SARCOS inverse dynamics.
    
    Inputs:
        - X : 21 features (positions, vitesses, accélérations)
        - y : 7 torques
    
    Returns:
        X_train, y_train, X_test, y_test (si test fourni)
    """
    # -------- TRAIN --------
    train_data = loadmat(path_train)
    train = train_data['sarcos_inv']

    X_train = train[:, :21]
    y_train = train[:, 21:]

    # -------- TEST --------
    if path_test is not None:
        test_data = loadmat(path_test)
        test = test_data['sarcos_inv_test']

        X_test = test[:, :21]
        y_test = test[:, 21:]
    else:
        X_test, y_test = None, None

    # -------- STANDARDIZATION --------
    if standardize:
        scaler_X = StandardScaler()
        scaler_y = StandardScaler()

        X_train = scaler_X.fit_transform(X_train)
        y_train = scaler_y.fit_transform(y_train)

        if X_test is not None:
            X_test = scaler_X.transform(X_test)
            y_test = scaler_y.transform(y_test)

    return X_train, y_train, X_test, y_test

In [47]:
X_train, y_train, X_test, y_test = load_sarcos(
    "data/sarcos_inv.mat",
    "data/sarcos_inv_test.mat"
)

print(X_train.shape)  # (~44k, 21)
print(y_train.shape)  # (~44k, 7)

(44484, 21)
(44484, 7)


In [48]:
mlp = MLPRegressor(
    hidden_layer_sizes=(256, 128, 64),
    activation='relu',
    solver='adam',
    max_iter=300,
    early_stopping=True,
    random_state=42
)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
mse = root_mean_squared_error(y_test, y_pred)
print("RMSE:", mse)
r2 = r2_score(y_test, y_pred)
print("R²:", r2)

def count_params_mlp(model):
    return sum(w.size + b.size for w, b in zip(model.coefs_, model.intercepts_))
print(count_params_mlp(mlp))

RMSE: 0.09432834536511595
R²: 0.9906200489375327
47239


In [54]:
import numpy as np

# Dataset Feynman I.43.16 (Viscosité / Cinétique des gaz)
# Formule : y = mu * u * n / (v - u)  (Avec une singularité quand v approche u)
# C'est MORTEL pour un MLP à cause de la division et de l'asymptote.

def feynman_function(X):
    # X columns: mu, u, n, v
    mu = X[:, 0]
    u  = X[:, 1]
    n  = X[:, 2]
    v  = X[:, 3] 
    # On s'assure que v > u pour éviter la division par zéro directe, mais on s'en approche
    return mu * u * n / (v - u + 1e-6)

N = 50000
X = np.random.rand(N, 4) * 10  # Valeurs entre 0 et 10
# On force v à être proche de u pour créer des pics
X[:, 3] = X[:, 1] + np.random.rand(N) * 2 

y = feynman_function(X)

mask_train = (y < np.percentile(y, 80)) # On apprend sur les valeurs "calmes"
X_train, y_train = X[mask_train], y[mask_train]
X_test, y_test   = X[~mask_train], y[~mask_train] # On teste sur les pics extrêmes

print(f"Train size: {len(X_train)}, Test size (The Trap): {len(X_test)}")

Train size: 40000, Test size (The Trap): 10000


In [55]:
mlp = MLPRegressor(
    hidden_layer_sizes=(256,256,256,256),
    activation='relu',
    solver='adam',
    max_iter=300,
    early_stopping=True,
    random_state=42
)
mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)
mse = root_mean_squared_error(y_test, y_pred)
print("RMSE:", mse)
r2 = r2_score(y_test, y_pred)
print("R²:", r2)

def count_params_mlp(model):
    return sum(w.size + b.size for w, b in zip(model.coefs_, model.intercepts_))
print(count_params_mlp(mlp))

RMSE: 119421.92440175549
R²: -0.0005458077095688019
198913


In [75]:
d_input = X_train.shape[1]
epochs = 50
learning_rate = 1e-3
n_samples = X.shape[0]
batch_size = 128

dims = [d_input,8,128,128,8,1]
dsn = DeepSpectralNet(dims, ortho_mode=None,use_layernorm=True)

# 1. Obtenir le nombre total de paramètres
total_params = dsn.num_parameters
print(f"Nombre total de paramètres : {total_params}")

# 2. Vérifier une couche spécifique
layer_params = dsn.layers[0].num_parameters
print(f"Paramètres de la première couche : {layer_params}")

# ==== Optimiseur et loss ====
optimizer = torch.optim.AdamW(dsn.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.1, patience=100
)
criterion = nn.MSELoss()

Nombre total de paramètres : 52477
Paramètres de la première couche : 60


In [76]:
X_train_t = torch.tensor(X_train, dtype=torch.float32)
X_test_t  = torch.tensor(X_test,  dtype=torch.float32)



y_train_t = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)
y_test_t = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1)

print(X_train_t.shape)  # (N_train, d_input)
print(y_train_t.shape)  # (N_train, 1)

batch_size = 256

train_dataset = TensorDataset(X_train_t, y_train_t)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = TensorDataset(X_test_t, y_test_t)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

optimizer = torch.optim.AdamW(dsn.parameters(), lr=learning_rate, weight_decay=1e-5)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.1, patience=100
)

torch.Size([40000, 4])
torch.Size([40000, 1])


In [77]:
epochs = 300
train_loss_history = []
val_loss_history = []

for epoch in range(epochs):
    # ======== Training ========
    dsn.train()
    total_train_loss = 0.0

    for Xb, yb in train_loader:
        optimizer.zero_grad()
        y_pred = dsn(Xb)         
        loss = criterion(y_pred, yb)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(dsn.parameters(), max_norm=1.0)
        optimizer.step()

        total_train_loss += loss.item() * Xb.size(0)

    avg_train_loss = total_train_loss / len(train_loader.dataset)
    train_loss_history.append(avg_train_loss)
    #scheduler.step(avg_train_loss)

    # ======== Validation ========
    dsn.eval()
    total_val_loss = 0.0
    with torch.no_grad():
        for Xb, yb in test_loader:  
            y_pred = dsn(Xb)
            loss = criterion(y_pred, yb)
            total_val_loss += loss.item() * Xb.size(0)

    avg_val_loss = total_val_loss / len(test_loader.dataset)
    val_loss_history.append(avg_val_loss)

    # ======== Logging ========
    if epoch % 20 == 0 or epoch == 0:
        current_lr = optimizer.param_groups[0]['lr']
        print(f"Epoch {epoch+1}/{epochs} | "
              f"Train Loss: {avg_train_loss:.6f} | "
              f"Val Loss: {avg_val_loss:.6f} | LR: {current_lr:.6f}")

Epoch 1/300 | Train Loss: 12165.911311 | Val Loss: 14268249930.700399 | LR: 0.001000
Epoch 21/300 | Train Loss: 469.842741 | Val Loss: 14266655398.231600 | LR: 0.001000
Epoch 41/300 | Train Loss: 224.623443 | Val Loss: 14266220610.919600 | LR: 0.001000


KeyboardInterrupt: 

In [78]:
dsn.eval()
with torch.no_grad(): y_pred = dsn(X_test_t) 

mse = root_mean_squared_error(y_test, y_pred)
print("RMSE:", mse)
r2 = r2_score(y_test, y_pred)
print("R²:", r2)

def count_params_mlp(model):
    return sum(w.size + b.size for w, b in zip(model.coefs_, model.intercepts_))
print(count_params_mlp(mlp))

RMSE: 119441.32276028839
R²: -0.0008708824017511496
198913
