In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torch.utils.data import DataLoader
from torchvision import transforms
import matplotlib.pyplot as plt
from torchvision.datasets import MNIST

In [2]:
class CNN_Model(nn.Module):
    def __init__(self):
        super(CNN_Model, self).__init__()

        # Convolution Layer 1
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)

        # Convolution Layer 2
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)

        # Fully Connected Layer
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)  # Output 10 classes (digits 0-9)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)  # Flatten the tensor
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [3]:
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_data = MNIST(root='./data', train=True, download=True, transform=transform)
test_data = MNIST(root='./data', train=False, download=True, transform=transform)

trainloader = DataLoader(train_data, batch_size=64, shuffle=True)
testloader = DataLoader(test_data, batch_size=64, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:02<00:00, 4.56MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 135kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:01<00:00, 1.27MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 10.5MB/s]

Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw






In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model
model = CNN_Model().to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [5]:
import time

# Total training time
total_start_time = time.time()

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for data in trainloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    # Only print the loss value for each epoch
    print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader):.4f}")

# Calculate total training time after all epochs
total_training_time = time.time() - total_start_time
print(f"Total Training Time: {total_training_time:.2f}s")


Epoch 1, Loss: 0.1572
Epoch 2, Loss: 0.0464
Epoch 3, Loss: 0.0324
Epoch 4, Loss: 0.0248
Epoch 5, Loss: 0.0186
Epoch 6, Loss: 0.0140
Epoch 7, Loss: 0.0114
Epoch 8, Loss: 0.0102
Epoch 9, Loss: 0.0086
Epoch 10, Loss: 0.0050
Total Training Time: 175.26s


In [6]:
from sklearn.metrics import f1_score

# Evaluation on test set
model.eval()  # Set the model to evaluation mode
test_loss = 0.0
correct = 0
total = 0
all_labels = []
all_preds = []

with torch.no_grad():  # Disable gradient calculation during evaluation
    for data in testloader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Collect predictions and labels for F1 score calculation
        all_labels.extend(labels.cpu().numpy())
        all_preds.extend(predicted.cpu().numpy())

# Calculate test metrics
test_accuracy = 100 * correct / total
test_f1 = f1_score(all_labels, all_preds, average='weighted')
test_loss = test_loss / len(testloader)

# Print each test metric on a separate line
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.2f}%")
print(f"Test F1 Score: {test_f1:.4f}")
print(f"Total Training Time: {total_training_time:.2f}s")



Test Loss: 0.0664
Test Accuracy: 98.25%
Test F1 Score: 0.9826
Total Training Time: 175.26s


In [7]:
import torch
import torchvision
from torchvision.transforms import Compose, ToTensor, Normalize
from torch.utils.data import DataLoader
from torch import nn
from torch.optim import Adam
import time  # Importing the time module to measure training time

# Hyperparamètres
BATCH_SIZE = 64
EPOCHS = 10
LEARNING_RATE = 0.001
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

# Préparer le dataset MNIST
transform = Compose([
    ToTensor(),
    Normalize((0.5,), (0.5,))  # Normalisation pour le MNIST
])

train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [8]:

# Définition du modèle RCNN pour classification
class RCNNClassifier(nn.Module):
    def __init__(self, num_classes=10):
        super(RCNNClassifier, self).__init__()

        # Backbone convolutionnel (feature extractor)
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),  # Sortie: 32x28x28
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Sortie: 32x14x14

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # Sortie: 64x14x14
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),  # Sortie: 64x7x7
        )

        # Region Proposal Network (simplifié pour classification uniquement)
        self.rpn = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),  # Sortie: 128x7x7
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)  # Sortie: 128x3x3
        )

        # Fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 3 * 3, 256),
            nn.ReLU(),
            nn.Dropout(0.5),  # Régularisation
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_layers(x)  # Extraction de caractéristiques
        x = self.rpn(x)          # RPN (simplifié ici pour extraire plus de caractéristiques)
        x = self.fc_layers(x)    # Classification finale
        return x

# Initialisation du modèle
model = RCNNClassifier(num_classes=10).to(DEVICE)

# Optimiseur et fonction de perte
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)

# Fonction d'entraînement avec calcul du temps
def train_model(model, train_loader, criterion, optimizer, epochs):
    model.train()
    start_time = time.time()  # Enregistrer le début du temps d'entraînement
    for epoch in range(epochs):
        total_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            # Zéro du gradient
            optimizer.zero_grad()

            # Passe avant
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Rétropropagation
            loss.backward()
            optimizer.step()

            # Suivi de la perte
            total_loss += loss.item()

        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss / len(train_loader):.4f}")

    end_time = time.time()  # Enregistrer le temps à la fin de l'entraînement
    training_time = end_time - start_time  # Calculer le temps total d'entraînement
    print(f"Total training time: {training_time:.2f} seconds")

In [9]:
from sklearn.metrics import f1_score  # Pour calculer la F1 score

# Fonction d'évaluation mise à jour
def evaluate_model(model, test_loader, criterion):
    model.eval()
    correct = 0
    total = 0
    total_loss = 0.0
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)

            # Passe avant
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            # Prédictions
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            # Stocker les étiquettes et prédictions pour F1 score
            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    # Calcul des métriques
    accuracy = 100 * correct / total
    avg_loss = total_loss / len(test_loader)
    f1 = f1_score(all_labels, all_predictions, average='weighted')  # F1 pondérée pour toutes les classes

    print(f"Accuracy: {accuracy:.2f}%")
    print(f"Average Loss: {avg_loss:.4f}")
    print(f"F1 Score: {f1:.4f}")

# Entraîner et évaluer avec affichage complet
train_model(model, train_loader, criterion, optimizer, EPOCHS)
evaluate_model(model, test_loader, criterion)


Epoch [1/10], Loss: 0.1985
Epoch [2/10], Loss: 0.0589
Epoch [3/10], Loss: 0.0443
Epoch [4/10], Loss: 0.0346
Epoch [5/10], Loss: 0.0273
Epoch [6/10], Loss: 0.0242
Epoch [7/10], Loss: 0.0211
Epoch [8/10], Loss: 0.0194
Epoch [9/10], Loss: 0.0178
Epoch [10/10], Loss: 0.0146
Total training time: 176.52 seconds
Accuracy: 99.22%
Average Loss: 0.0315
F1 Score: 0.9922



Performance (Accuracy, F1 Score, Loss) :

Le RCNN surpasse le CNN sur toutes les métriques de performance. La complexité ajoutée par l'utilisation des blocs RPN dans le RCNN permet d'extraire des caractéristiques plus discriminatives, ce qui conduit à une meilleure classification.

Training Time ::

Les deux modèles ont des temps d'entraînement similaires (environ 175-176 secondes), ce qui montre que le RCNN n'ajoute pas une surcharge significative malgré son architecture plus complexe.


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch.utils.data import DataLoader
from torch.optim import Adam
from torchvision import models
from sklearn.metrics import f1_score
import time

# Hyperparameters
BATCH_SIZE = 64
LEARNING_RATE = 0.001
EPOCHS = 10
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transformations: Resize to 224x224 and convert to RGB (3 channels)
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),  # Convert grayscale to RGB
    transforms.Resize((224, 224)),  # Resize to 224x224 for VGG16
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalization for MNIST
])

# Load MNIST dataset with transformations
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Initialize the pretrained VGG16 model and move it to the correct device (GPU or CPU)
vgg16 = models.vgg16(pretrained=True).to(DEVICE)

# Freeze the pretrained layers of VGG16
for param in vgg16.parameters():
    param.requires_grad = False

# Modify the classifier to match MNIST (10 classes)
vgg16.classifier[6] = nn.Linear(4096, 10).to(DEVICE)  # Ensure the last layer is on the same device

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = Adam(vgg16.classifier.parameters(), lr=LEARNING_RATE)

# Training function
def train_model(model, train_loader, criterion, optimizer, epochs):
    model.train()  # Set model to training mode
    total_train_time = 0
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        all_preds = []
        all_labels = []
        start_time = time.time()  # Track time at the start of the epoch

        for images, labels in train_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)  # Move data to the same device as the model

            optimizer.zero_grad()

            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)

            # Backward pass and optimization
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

        epoch_time = time.time() - start_time  # Calculate time for this epoch
        total_train_time += epoch_time

        # Calculate accuracy and F1 score
        accuracy = 100 * correct / total
        f1 = f1_score(all_labels, all_preds, average='weighted')
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(train_loader):.4f}, '
              f'Accuracy: {accuracy:.2f}%, F1 Score: {f1:.4f}, Time: {epoch_time:.2f}s')

    return total_train_time

# Evaluation function
def evaluate_model(model, test_loader, criterion):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    test_loss = 0.0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(DEVICE), labels.to(DEVICE)  # Move data to the same device as the model

            outputs = model(images)
            loss = criterion(outputs, labels)
            test_loss += loss.item()

            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    accuracy = 100 * correct / total
    f1 = f1_score(all_labels, all_preds, average='weighted')
    print(f'Test Loss: {test_loss/len(test_loader):.4f}, Test Accuracy: {accuracy:.2f}%, '
          f'Test F1 Score: {f1:.4f}')

# Train and evaluate the model
train_time = train_model(vgg16, train_loader, criterion, optimizer, EPOCHS)
evaluate_model(vgg16, test_loader, criterion)

print(f'Total Training Time: {train_time:.2f} seconds')


Epoch [1/10], Loss: 0.5198, Accuracy: 83.54%, F1 Score: 0.8353, Time: 363.24s
Epoch [2/10], Loss: 0.3874, Accuracy: 87.13%, F1 Score: 0.8713, Time: 365.66s
Epoch [3/10], Loss: 0.3758, Accuracy: 87.52%, F1 Score: 0.8752, Time: 367.65s
Epoch [4/10], Loss: 0.3674, Accuracy: 87.93%, F1 Score: 0.8793, Time: 365.57s
Epoch [5/10], Loss: 0.3614, Accuracy: 88.05%, F1 Score: 0.8805, Time: 364.62s
