<a href="https://colab.research.google.com/github/SimonJ09/EEIA/blob/main/EEIA2025_TP_DL_Diabete.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# EEIA 2025
## Téléchargement et chargement de la donnée
Suite TP_MLP_diabete

In [None]:
# Here we download the dataset
!curl https://raw.githubusercontent.com/renatopp/arff-datasets/refs/heads/master/classification/diabetes.arff -o diabetes.arff

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 38282  100 38282    0     0   221k      0 --:--:-- --:--:-- --:--:--  222k


In [None]:
from scipy.io import arff
import pandas as pd

# Load the ARFF file
data, meta = arff.loadarff('diabetes.arff')

# Convert to pandas DataFrame
df_diabete = pd.DataFrame(data)

# If some fields are byte-encoded (common for nominal), decode them:
for column in df_diabete.select_dtypes([object]):
    df_diabete[column] = df_diabete[column].str.decode('utf-8')

# Preview
df_diabete.head()

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,tested_positive
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,tested_negative
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,tested_positive
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,tested_negative
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,tested_positive


# Méthode I : Code de PyTorch

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

In [None]:
data = df_diabete
# Convertir les classes en valeurs numériques (0 pour tested_negative, 1 pour tested_positive)
data['class'] = data['class'].map({'tested_negative': 0, 'tested_positive': 1})

# Séparer les features et les targets
X = data.drop('class', axis=1).values
y = data['class'].values


In [None]:
# Diviser en train et test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normaliser les données
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convertir en tenseurs PyTorch
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

# Créer des DataLoader pour le traitement par batch
train_data = TensorDataset(X_train, y_train)
test_data = TensorDataset(X_test, y_test)

batch_size = 16
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

In [None]:
# Définition du modèle
class DiabetesNN(nn.Module):
    def __init__(self, input_size):
        super(DiabetesNN, self).__init__()
        # Architecture du réseau:
        # Couche d'entrée -> Couche cachée (64 neurones) -> Couche de sortie (2 classes)
        self.layer1 = nn.Linear(input_size, 16)
        self.layer2 = nn.Linear(16, 8)
        self.output = nn.Linear(8, 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.1)  # Dropout pour réduire l'overfitting

    def forward(self, x):
        x = self.relu(self.layer1(x))
        x = self.relu(self.layer2(x))
        x = self.output(x)  # Pas d'activation ici car on utilisera CrossEntropyLoss
        return x

In [None]:
# Initialisation
input_size = X_train.shape[1]  # Nombre de features
model = DiabetesNN(input_size)

# Définition de la fonction de coût et de l'optimiseur
criterion = nn.CrossEntropyLoss()  # Bonne pour la classification binaire/multiclasse
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Optimiseur Adam avec learning rate de 0.001

In [None]:
# Entraînement du modèle
epochs = 100
train_losses = []
test_losses = []
accuracy_list = []

for epoch in range(epochs):
    # Entraînement
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()  # Réinitialiser les gradients

        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Calculer la loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Mise à jour des poids

        running_loss += loss.item()

    # Calcul de la loss moyenne pour l'epoch
    train_loss = running_loss / len(train_loader)
    train_losses.append(train_loss)

    # Validation
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_loss = test_loss / len(test_loader)
    test_losses.append(test_loss)
    accuracy = 100 * correct / total
    accuracy_list.append(accuracy)

    # Affichage des résultats pour l'epoch
    print(f'Epoch {epoch+1}/{epochs} - Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}, Accuracy: {accuracy:.2f}%')

Epoch 1/100 - Train Loss: 0.7717, Test Loss: 0.7270, Accuracy: 38.31%
Epoch 2/100 - Train Loss: 0.6969, Test Loss: 0.6618, Accuracy: 59.74%
Epoch 3/100 - Train Loss: 0.6294, Test Loss: 0.6167, Accuracy: 68.18%
Epoch 4/100 - Train Loss: 0.5783, Test Loss: 0.5870, Accuracy: 69.48%
Epoch 5/100 - Train Loss: 0.5447, Test Loss: 0.5645, Accuracy: 72.73%
Epoch 6/100 - Train Loss: 0.5142, Test Loss: 0.5459, Accuracy: 72.73%
Epoch 7/100 - Train Loss: 0.4933, Test Loss: 0.5360, Accuracy: 75.97%
Epoch 8/100 - Train Loss: 0.4724, Test Loss: 0.5278, Accuracy: 75.97%
Epoch 9/100 - Train Loss: 0.4600, Test Loss: 0.5241, Accuracy: 75.32%
Epoch 10/100 - Train Loss: 0.4468, Test Loss: 0.5205, Accuracy: 74.68%
Epoch 11/100 - Train Loss: 0.4406, Test Loss: 0.5208, Accuracy: 75.97%
Epoch 12/100 - Train Loss: 0.4366, Test Loss: 0.5205, Accuracy: 75.97%
Epoch 13/100 - Train Loss: 0.4344, Test Loss: 0.5215, Accuracy: 75.97%
Epoch 14/100 - Train Loss: 0.4354, Test Loss: 0.5247, Accuracy: 75.32%
Epoch 15/100 - 

In [None]:
# Évaluation finale
model.eval()
with torch.no_grad():
    y_pred = []
    y_true = []
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        y_pred.extend(predicted.numpy())
        y_true.extend(labels.numpy())

# Calcul de l'accuracy final
final_accuracy = 100 * (sum(1 for x,y in zip(y_pred, y_true) if x == y) / len(y_true))
print(f'\nFinal Test Accuracy: {final_accuracy:.2f}%')

# Sauvegarde du modèle (optionnel)
# torch.save(model.state_dict(), 'diabetes_model.pth')


Final Test Accuracy: 74.68%


# Méthode II : Utilisation de Kéras

In [None]:
import os
import keras

os.environ["KERAS_BACKEND"] = "torch"  # Set PyTorch as backend

# Load and prepare data
# Using the same variable as above : X_train, X_test, y_train and y_test

# Build model using Keras API
model = keras.Sequential([
    keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    #keras.layers.Dropout(0.1),
    keras.layers.Dense(64, activation='relu'),
    #keras.layers.Dropout(0.1),
    keras.layers.Dense(2, activation='softmax')
])

# Compile with PyTorch-compatible settings
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss=keras.losses.SparseCategoricalCrossentropy(),
    metrics=['accuracy']
)

# Train with PyTorch backend
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=16,
    validation_data=(X_test, y_test),
    callbacks=[keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)]
)

# Evaluate
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f'\nTest Accuracy: {test_acc*100:.2f}%')

# Optional: Save model (will save in PyTorch format)
# model.save('diabetes_model.keras')

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.7077 - loss: 0.5787 - val_accuracy: 0.7338 - val_loss: 0.5227
Epoch 2/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7670 - loss: 0.4809 - val_accuracy: 0.7468 - val_loss: 0.5089
Epoch 3/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8036 - loss: 0.4238 - val_accuracy: 0.7338 - val_loss: 0.5164
Epoch 4/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7905 - loss: 0.4621 - val_accuracy: 0.7597 - val_loss: 0.5257
Epoch 5/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7954 - loss: 0.4378 - val_accuracy: 0.7532 - val_loss: 0.5212
Epoch 6/100
[1m39/39[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8006 - loss: 0.4178 - val_accuracy: 0.7468 - val_loss: 0.5255
Epoch 7/100
[1m39/39[0m [32m━━━━━━━━━━━━━━