In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from sklearn.metrics import confusion_matrix, accuracy_score, recall_score, f1_score, classification_report

import pandas as pnd


<h1 style="color: red;">Section 1: Data</h1>

<h2>1) Préparation de données</h2>

In [None]:
dataset =pnd.read_csv('diabetes.csv')#import
X = np.array(dataset.drop(columns=['Outcome'])) #features
y = np.array(dataset['Outcome']) #target
#spilt data
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=23)


<h1 style="color: red;">Section 2: Neural network avec tensorflow</h1>

In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense

from tensorflow.keras.optimizers import Adam

<h2>2) Modèle de réseau de neurones</h2>

In [None]:
model_nn = Sequential()
# 2-a-1 & 2-a-2 & 2-a-3: Based on the dataset nature (binary classification)
# Input shape is X_train.shape[1] (8 features)
# Output is 1 neuron (binary classification)
# Activation is sigmoid (for binary classification)
output_layer = Dense(1, input_shape=(X_train.shape[1],), activation='sigmoid')
model_nn.add(output_layer)

# 2-c: Compile the model
opt = Adam(learning_rate=0.001)
model_nn.compile(optimizer=opt, loss="binary_crossentropy", metrics=["accuracy"])

# 2-d: Train the model
history = model_nn.fit(X_train, y_train, epochs=1000, batch_size=32, verbose=1, validation_split=0.2)


<h2>3) Prédiction en utilisant le modèle</h2>

In [None]:
yhat_nn=model_nn.predict(X_test)

In [None]:
yhat_nn=yhat_nn.flatten()
yhat_nn_class = (yhat_nn > 0.5).astype(int)

In [None]:
W_nn, bias_nn = model_nn.layers[0].get_weights()
# Manual prediction using sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

manual_predictions = sigmoid(np.dot(X_test, W_nn) + bias_nn)
manual_predictions = manual_predictions.flatten()
manual_predictions_class = (manual_predictions > 0.5).astype(int)

# 4-a & 4-b: Evaluate model
# Training set evaluation
train_predictions = model_nn.predict(X_train).flatten()
train_predictions_class = (train_predictions > 0.5).astype(int)

print("Model Evaluation on Training Set:")
print("---------------------------------")
print(f"Accuracy: {accuracy_score(y_train, train_predictions_class):.4f}")
print(f"Recall: {recall_score(y_train, train_predictions_class):.4f}")
print(f"F1 Score: {f1_score(y_train, train_predictions_class):.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_train, train_predictions_class))
print("\nClassification Report:")
print(classification_report(y_train, train_predictions_class))

# Test set evaluation
print("\nModel Evaluation on Test Set:")
print("-------------------------------")
print(f"Accuracy: {accuracy_score(y_test, yhat_nn_class):.4f}")
print(f"Recall: {recall_score(y_test, yhat_nn_class):.4f}")
print(f"F1 Score: {f1_score(y_test, yhat_nn_class):.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, yhat_nn_class))
print("\nClassification Report:")
print(classification_report(y_test, yhat_nn_class))

# Plot training history
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss During Training')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy During Training')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.tight_layout()
plt.show()

# Plot residuals
residuals = y_test - yhat_nn
plt.figure(figsize=(10, 6))
plt.scatter(yhat_nn, residuals)
plt.axhline(y=0, color='r', linestyle='-')
plt.title('Residual Plot')
plt.xlabel('Predicted Values')
plt.ylabel('Residuals')
plt.grid(True)
plt.show()

In [None]:
W,bias

<h2>4) Evaluation du modèle</h2>

In [None]:
num_features = X_train.shape[1]
num_params = (num_features * 1) + 1
print(f"Number of parameters (manual calculation): {num_params}")

# Display model details
model_nn.summary()

# 2-h: Visualize the neural network
plt.figure(figsize=(12, 8))
plt.title('Neural Network Architecture for Diabetes Classification')

# Input layer
for i in range(num_features):
    plt.scatter(0, i, s=100, color='blue')
    plt.text(0.1, i, f'Input {i+1}', fontsize=10)

# Output layer
plt.scatter(1, num_features//2, s=100, color='red')
plt.text(1.1, num_features//2, 'Output (Sigmoid)', fontsize=10)

# Weights
W_nn, bias_nn = model_nn.layers[0].get_weights()
max_weight = np.max(np.abs(W_nn))

for i in range(num_features):
    # Line thickness proportional to weight magnitude
    weight = W_nn[i][0]
    line_width = 0.5 + 3 * abs(weight) / max_weight
    line_color = 'green' if weight > 0 else 'red'
    plt.plot([0, 1], [i, num_features//2], linewidth=line_width, color=line_color, alpha=0.6)
    plt.text(0.5, (i + num_features//2) / 2, f'{weight:.3f}', fontsize=8)

plt.text(1.1, num_features//2 - 0.5, f'Bias: {bias_nn[0]:.3f}', fontsize=10)

# Add labels
plt.text(0, -1, 'Input Layer\n(8 features)', ha='center', fontsize=12)
plt.text(1, -1, 'Output Layer\n(1 neuron, sigmoid)', ha='center', fontsize=12)

plt.xlim(-0.5, 1.5)
plt.ylim(-1.5, num_features)
plt.axis('off')
plt.tight_layout()
plt.show()

<h1>From scratch</h1>


<h2>Modèle de régression logistic from scratch avec utilisation des matrices</h2>


In [None]:
learning_rate = 0.0001
epochs = 1000

# Sigmoid function
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Initialisation des paramètres
W = np.zeros((X_train.shape[1], 1))  # Shape: (8, 1) for 8 features
b = 0.0

# Reshape y_train pour garantir les dimensions adéquates 
y_train = y_train.reshape(-1, 1)  # Shape: (n_samples, 1)
n = len(X_train)

# Loss history for plotting
loss_history = []

# Entraînement (descente de gradient vectorisée)
for epoch in range(epochs):
    # Forward pass
    z = np.dot(X_train, W) + b
    y_pred = sigmoid(z)  # Shape: (n, 1)
    
    # Calculate loss (binary cross-entropy)
    loss = -np.mean(y_train * np.log(np.maximum(y_pred, 1e-15)) + 
                   (1 - y_train) * np.log(np.maximum(1 - y_pred, 1e-15)))
    loss_history.append(loss)
    
    # Calculate error
    error = y_pred - y_train  # Shape: (n, 1)

    # Calcul des gradients
    dW = (1/n) * (np.dot(X_train.T, error))  # Shape: (8, 1)
    db = (1/n) * np.sum(error)        # Scalaire

    # Mise à jour des paramètres
    W -= learning_rate * dW
    b -= learning_rate * db
    
    # Print progress occasionally
    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss:.4f}")

# Résultats
print("Paramètres ajustés:")
print(f"W = \n{W}")
print(f"b = {b:.4f}")

# Make predictions on test set
z_test = np.dot(X_test, W) + b
y_pred_scratch = sigmoid(z_test)
y_pred_classes = (y_pred_scratch > 0.5).astype(int)

# Evaluate the model
print("\nModel Evaluation (Logistic Regression from Scratch):")
print("-------------------------------------------------")
print(f"Accuracy: {accuracy_score(y_test, y_pred_classes):.4f}")
print(f"Recall: {recall_score(y_test, y_pred_classes):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred_classes):.4f}")
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred_classes))
print("\nClassification Report:")
print(classification_report(y_test, y_pred_classes))

# Plot training loss
plt.figure(figsize=(10, 6))
plt.plot(loss_history)
plt.title('Loss During Training (Logistic Regression from Scratch)')
plt.xlabel('Epochs')
plt.ylabel('Binary Cross-Entropy Loss')
plt.grid(True)
plt.show()

# Compare with neural network model
plt.figure(figsize=(12, 8))
plt.subplot(2, 2, 1)
cm1 = confusion_matrix(y_test, yhat_nn_class)
plt.imshow(cm1, cmap='Blues')
plt.title('Neural Network Confusion Matrix')
plt.colorbar()
for i in range(cm1.shape[0]):
    for j in range(cm1.shape[1]):
        plt.text(j, i, cm1[i, j], ha='center', va='center')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')

plt.subplot(2, 2, 2)
cm2 = confusion_matrix(y_test, y_pred_classes)
plt.imshow(cm2, cmap='Blues')
plt.title('Logistic Regression (Scratch) Confusion Matrix')
plt.colorbar()
for i in range(cm2.shape[0]):
    for j in range(cm2.shape[1]):
        plt.text(j, i, cm2[i, j], ha='center', va='center')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')

plt.subplot(2, 2, 3)
plt.bar(['NN', 'LR'], [accuracy_score(y_test, yhat_nn_class), accuracy_score(y_test, y_pred_classes)])
plt.title('Accuracy Comparison')
plt.ylim(0, 1)

plt.subplot(2, 2, 4)
plt.bar(['NN', 'LR'], [f1_score(y_test, yhat_nn_class), f1_score(y_test, y_pred_classes)])
plt.title('F1 Score Comparison')
plt.ylim(0, 1)

plt.tight_layout()
plt.show()