```text
Non Linear Network
```

In [None]:
# We import pytorch and torchvision
import torchvision, torch
import torch.nn as nn
import torch.nn.functional as F
# Import numpy
import numpy as np
import time

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))  # Perform normalization on GPU if possible
])

# We load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='data/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='data/', train=False, transform=transform, download=True)

# Charger un dataloader with batch size x
def get_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

def get_batch_format(data_loader):
    images_t, labels_t = next(iter(data_loader))
    print('images.shape:', images_t.shape)
    print('labels.shape:', labels_t.shape)

class Network(nn.Module):
    def __init__(self, input_dim, hidden_dim1 = 32, output_dim=10):
        super(Network, self).__init__()
        self.a = nn.Linear(input_dim, hidden_dim1)
        self.b = nn.Linear(hidden_dim1, hidden_dim1)
        self.c = nn.Linear(hidden_dim1, hidden_dim1)
        self.d = nn.Linear(hidden_dim1, output_dim)
        
        # We Add non-linearity
        self.act = nn.ReLU()

    def forward(self, x):
        x = x.permute(0, 2, 3, 1)
        x1 = self.act(self.a(x))
        x2 = self.act(self.b(x1))
        x3 = self.act(self.c(x2))
        x4 = self.d(x3)
        return x4

# Fonction accuracy pour calculer le total des bonnes réponses
def accuracy(predictions, labels):
    _, predicted_labels = torch.max(predictions, 1)
    correct = (predicted_labels == labels).sum().item()
    return correct / len(labels)

# Charger dans un DataLoader le data set de train CIFAR10
train_loader = get_data_loader(train_dataset, 128) # On charge par batch
# On charge le dataset de testto
test_loader = get_data_loader(test_dataset, 10000) # On charge tout le dataset de test
# Print the format of the batch
get_batch_format(train_loader)

# On recupère le device
deviceGPU = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# On crée une instance de la classe MyClass
modelNet = Network(3*32*32, 32, 10)

# On envoie le modèle sur le device
modelNet = modelNet.to(deviceGPU)

# On crée une fonction de loss
lossFn = F.cross_entropy
learningRate = 0.0001

# On crée un optimiseur
opt = torch.optim.Adam(modelNet.parameters(), lr=learningRate, weight_decay=0.0001)

def off_load_on_gpu(train_loader, test_loader, device):
    train_loader_gpu = [(x.to(device).view(x.shape[0], -1), y.to(device)) for x, y in train_loader]
    test_loader_gpu = [(x.to(device).view(x.shape[0], -1), y.to(device)) for x, y in test_loader]
    return train_loader_gpu, test_loader_gpu

def training_cycle(model, train_loader, device):
    model.train()
    for x, y in train_loader:
        preds = model(x)
        loss = lossFn(preds, y)
        # Compute gradients
        loss.backward()
        # Update parameters
        opt.step()
        # Reset gradients to 0
        opt.zero_grad()
    
def validation_cycle(model, test_loader, epoch, num_epochs, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    with torch.no_grad():
        for x, y in test_loader:
            preds = model(x)
            loss = lossFn(preds, y)
            total_loss += loss.item()
            total_correct += accuracy(preds, y)

    avg_loss = total_loss / len(test_loader)
    avg_acc = total_correct / len(test_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_acc * 100:.2f}%")

def fit_one_cycle(model, train_loader, test_loader, epoch, num_epochs, device): 
    # Training
    training_cycle(model, train_loader, device)
    
    # Validation
    validation_cycle(model, test_loader, epoch, num_epochs, device)
    
# Training loop
num_epochs = 50
# Time to train
time_to_train = time.time()

# On charge les données sur le GPU
train_loader_gpu, test_loader_gpu = off_load_on_gpu(train_loader, test_loader, deviceGPU)

for epoch in range(num_epochs):
    fit_one_cycle(modelNet, train_loader_gpu, test_loader_gpu, epoch, num_epochs, deviceGPU)

time_to_train = time.time() - time_to_train
print(f"Time to train: {time_to_train:.2f} seconds")

```text
Convolutionnal Network
```

In [None]:
# We import pytorch and torchvision
import torchvision, torch
import torch.nn as nn
import torch.nn.functional as F
# Import numpy
import numpy as np
import time

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))  # Perform normalization on GPU if possible
])

# We load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='data/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='data/', train=False, transform=transform, download=True)

# Charger un dataloader with batch size x
def get_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

def get_batch_format(data_loader):
    images_t, labels_t = next(iter(data_loader))
    print('images.shape:', images_t.shape)
    print('labels.shape:', labels_t.shape)

# Fonction accuracy pour calculer le total des bonnes réponses
def accuracy(predictions, labels):
    _, predicted_labels = torch.max(predictions, 1)
    correct = (predicted_labels == labels).sum().item()
    return correct / len(labels)

# Charger dans un DataLoader le data set de train CIFAR10
train_loader = get_data_loader(train_dataset, 128) # On charge par batch
# On charge le dataset de testto
test_loader = get_data_loader(test_dataset, 10000) # On charge tout le dataset de test
# Print the format of the batch
get_batch_format(train_loader)

# On recupère le device
deviceGPU = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# On crée une fonction de loss
lossFn = F.cross_entropy
learningRate = 0.0001

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 10)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.act(self.conv1(x)))
        x = self.pool(self.act(self.conv2(x)))
        x = self.pool(self.act(self.conv3(x)))
        x = x.view(-1, 128 * 4 * 4)
        x = self.act(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of the ConvNet model
modelNet = ConvNet().to(deviceGPU)

# Create a new optimizer for the ConvNet model
opt = torch.optim.Adam(modelNet.parameters(), lr=learningRate, weight_decay=0.0001)

def off_load_on_gpu(train_loader, test_loader, device):
    train_loader_gpu = [(x.to(device), y.to(device)) for x, y in train_loader]
    test_loader_gpu = [(x.to(device), y.to(device)) for x, y in test_loader]
    return train_loader_gpu, test_loader_gpu

def training_cycle(model, train_loader, device):
    model.train()
    for x, y in train_loader:
        preds = model(x)
        loss = lossFn(preds, y)
        # Compute gradients
        loss.backward()
        # Update parameters
        opt.step()
        # Reset gradients to 0
        opt.zero_grad()
    
def validation_cycle(model, test_loader, epoch, num_epochs, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    with torch.no_grad():
        for x, y in test_loader:
            preds = model(x)
            loss = lossFn(preds, y)
            total_loss += loss.item()
            total_correct += accuracy(preds, y)

    avg_loss = total_loss / len(test_loader)
    avg_acc = total_correct / len(test_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_acc * 100:.2f}%")

def fit_one_cycle(model, train_loader, test_loader, epoch, num_epochs, device): 
    # Training
    training_cycle(model, train_loader, device)
    
    # Validation
    validation_cycle(model, test_loader, epoch, num_epochs, device)

# Offload data to GPU
train_loader_gpu, test_loader_gpu = off_load_on_gpu(train_loader, test_loader, deviceGPU)

# Training loop
num_epochs = 50
time_to_train = time.time()

for epoch in range(num_epochs):
    fit_one_cycle(modelNet, train_loader_gpu, test_loader_gpu, epoch, num_epochs, deviceGPU)

time_to_train = time.time() - time_to_train
print(f"Time to train: {time_to_train:.2f} seconds")

```text
Strided Convolutions
```

In [None]:
# We import pytorch and torchvision
import torchvision, torch
import torch.nn as nn
import torch.nn.functional as F
# Import numpy
import numpy as np
import time

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))  # Perform normalization on GPU if possible
])

# We load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='data/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='data/', train=False, transform=transform, download=True)

# Charger un dataloader with batch size x
def get_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

def get_batch_format(data_loader):
    images_t, labels_t = next(iter(data_loader))
    print('images.shape:', images_t.shape)
    print('labels.shape:', labels_t.shape)

# Fonction accuracy pour calculer le total des bonnes réponses
def accuracy(predictions, labels):
    _, predicted_labels = torch.max(predictions, 1)
    correct = (predicted_labels == labels).sum().item()
    return correct / len(labels)

# Charger dans un DataLoader le data set de train CIFAR10
train_loader = get_data_loader(train_dataset, 128) # On charge par batch
# On charge le dataset de testto
test_loader = get_data_loader(test_dataset, 10000) # On charge tout le dataset de test
# Print the format of the batch
get_batch_format(train_loader)

# On recupère le device
deviceGPU = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# On crée une fonction de loss
lossFn = F.cross_entropy
learningRate = 0.0001
  
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=2, padding=1)  # Stride 2
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=2, padding=1)  # Stride 2
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=2, padding=1)  # Stride 2
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 10)
        self.act = nn.ReLU()

    def forward(self, x):
        x = self.act(self.conv1(x))
        x = self.act(self.conv2(x))
        x = self.act(self.conv3(x))
        x = x.view(-1, 128 * 4 * 4)
        x = self.act(self.fc1(x))
        x = self.fc2(x)
        return x

# Create an instance of the ConvNet model
modelNet = ConvNet().to(deviceGPU)

# Create a new optimizer for the ConvNet model
opt = torch.optim.Adam(modelNet.parameters(), lr=learningRate, weight_decay=0.0001)

def off_load_on_gpu(train_loader, test_loader, device):
    train_loader_gpu = [(x.to(device), y.to(device)) for x, y in train_loader]
    test_loader_gpu = [(x.to(device), y.to(device)) for x, y in test_loader]
    return train_loader_gpu, test_loader_gpu

def training_cycle(model, train_loader, device):
    model.train()
    for x, y in train_loader:
        preds = model(x)
        loss = lossFn(preds, y)
        # Compute gradients
        loss.backward()
        # Update parameters
        opt.step()
        # Reset gradients to 0
        opt.zero_grad()
    
def validation_cycle(model, test_loader, epoch, num_epochs, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    with torch.no_grad():
        for x, y in test_loader:
            preds = model(x)
            loss = lossFn(preds, y)
            total_loss += loss.item()
            total_correct += accuracy(preds, y)

    avg_loss = total_loss / len(test_loader)
    avg_acc = total_correct / len(test_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_acc * 100:.2f}%")

def fit_one_cycle(model, train_loader, test_loader, epoch, num_epochs, device): 
    # Training
    training_cycle(model, train_loader, device)
    
    # Validation
    validation_cycle(model, test_loader, epoch, num_epochs, device)

# Offload data to GPU
train_loader_gpu, test_loader_gpu = off_load_on_gpu(train_loader, test_loader, deviceGPU)

# Training loop
num_epochs = 50
time_to_train = time.time()

for epoch in range(num_epochs):
    fit_one_cycle(modelNet, train_loader_gpu, test_loader_gpu, epoch, num_epochs, deviceGPU)

time_to_train = time.time() - time_to_train
print(f"Time to train: {time_to_train:.2f} seconds")

```text
Convolutionnal Network (With Residuals Connections)
```

### Pourquoi n'aurait-on pas pu le faire avec une convolution à stride ?

Les convolutions avec stride modifient la taille spatiale (hauteur et largeur) du tenseur d'entrée. Cela signifie que la taille du tenseur de sortie de la convolution ne correspondra pas à la taille du tenseur d'entrée, ce qui rend impossible l'addition directe des tenseurs dans une connexion résiduelle. Les connexions résiduelles nécessitent que les tenseurs aient les mêmes dimensions pour pouvoir les additionner. 

En utilisant `MaxPool` avec stride, nous réduisons la taille spatiale de manière contrôlée après avoir ajouté la connexion résiduelle, ce qui permet de maintenir la compatibilité des dimensions pour l'addition.

Similar code found with 2 license types
```

In [None]:
# We import pytorch and torchvision
import torchvision, torch
import torch.nn as nn
import torch.nn.functional as F
# Import numpy
import numpy as np
import time

transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))  # Perform normalization on GPU if possible
])

# We load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='data/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='data/', train=False, transform=transform, download=True)

# Charger un dataloader with batch size x
def get_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

def get_batch_format(data_loader):
    images_t, labels_t = next(iter(data_loader))
    print('images.shape:', images_t.shape)
    print('labels.shape:', labels_t.shape)

# Fonction accuracy pour calculer le total des bonnes réponses
def accuracy(predictions, labels):
    _, predicted_labels = torch.max(predictions, 1)
    correct = (predicted_labels == labels).sum().item()
    return correct / len(labels)

# Charger dans un DataLoader le data set de train CIFAR10
train_loader = get_data_loader(train_dataset, 128) # On charge par batch
# On charge le dataset de testto
test_loader = get_data_loader(test_dataset, 10000) # On charge tout le dataset de test
# Print the format of the batch
get_batch_format(train_loader)

# On recupère le device
deviceGPU = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# On crée une fonction de loss
lossFn = F.cross_entropy
learningRate = 0.0001

class ConvNetResidual(nn.Module):
    def __init__(self):
        super(ConvNetResidual, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 10)
        self.act = nn.ReLU(inplace=False)  # Out-of-place activation

        # 1x1 convolutions for residuals
        self.res_conv1 = nn.Conv2d(3, 32, kernel_size=1, stride=1, padding=0)
        self.res_conv2 = nn.Conv2d(32, 64, kernel_size=1, stride=1, padding=0)
        self.res_conv3 = nn.Conv2d(64, 128, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        # First convolution + residual
        residual = self.res_conv1(x)
        x = self.act(self.conv1(x))
        x = x + residual  # Out-of-place addition
        x = self.pool(x)

        # Second convolution + residual
        residual = self.res_conv2(x)
        x = self.act(self.conv2(x))
        x = x + residual  # Out-of-place addition
        x = self.pool(x)

        # Third convolution + residual
        residual = self.res_conv3(x)
        x = self.act(self.conv3(x))
        x = x + residual  # Out-of-place addition
        x = self.pool(x)

        # Flatten and pass through fully connected layers
        x = x.view(-1, 128 * 4 * 4)
        x = self.act(self.fc1(x))
        x = self.fc2(x)
        return x

# Replace the previous model with the residual one
modelNet = ConvNetResidual().to(deviceGPU)

torch.autograd.set_detect_anomaly(True)

# Create a new optimizer for the ConvNet model
opt = torch.optim.Adam(modelNet.parameters(), lr=learningRate, weight_decay=0.0001)

def off_load_on_gpu(train_loader, test_loader, device):
    train_loader_gpu = [(x.to(device), y.to(device)) for x, y in train_loader]
    test_loader_gpu = [(x.to(device), y.to(device)) for x, y in test_loader]
    return train_loader_gpu, test_loader_gpu

def training_cycle(model, train_loader, device):
    model.train()
    for x, y in train_loader:
        preds = model(x)
        loss = lossFn(preds, y)
        # Compute gradients
        loss.backward()
        # Update parameters
        opt.step()
        # Reset gradients to 0
        opt.zero_grad()
    
def validation_cycle(model, test_loader, epoch, num_epochs, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    with torch.no_grad():
        for x, y in test_loader:
            preds = model(x)
            loss = lossFn(preds, y)
            total_loss += loss.item()
            total_correct += accuracy(preds, y)

    avg_loss = total_loss / len(test_loader)
    avg_acc = total_correct / len(test_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_acc * 100:.2f}%")

def fit_one_cycle(model, train_loader, test_loader, epoch, num_epochs, device): 
    # Training
    training_cycle(model, train_loader, device)
    
    # Validation
    validation_cycle(model, test_loader, epoch, num_epochs, device)

# Offload data to GPU
train_loader_gpu, test_loader_gpu = off_load_on_gpu(train_loader, test_loader, deviceGPU)

# Training loop
num_epochs = 50
time_to_train = time.time()

for epoch in range(num_epochs):
    fit_one_cycle(modelNet, train_loader_gpu, test_loader_gpu, epoch, num_epochs, deviceGPU)

time_to_train = time.time() - time_to_train
print(f"Time to train: {time_to_train:.2f} seconds")

```txt
Convolutionnal Network (With Batch Normalization)
```

In [None]:
# Import libraries
import torchvision, torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import time

# Define the transform
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.5,), (0.5,))  # Normalize on GPU if possible
])

# Load CIFAR-10 dataset
train_dataset = torchvision.datasets.CIFAR10(root='data/', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.CIFAR10(root='data/', train=False, transform=transform, download=True)

# Function to create DataLoader
def get_data_loader(dataset, batch_size):
    return torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)

# Display batch format
def get_batch_format(data_loader):
    images_t, labels_t = next(iter(data_loader))
    print('images.shape:', images_t.shape)
    print('labels.shape:', labels_t.shape)

# Accuracy function
def accuracy(predictions, labels):
    _, predicted_labels = torch.max(predictions, 1)
    correct = (predicted_labels == labels).sum().item()
    return correct / len(labels)

# Load DataLoaders
train_loader = get_data_loader(train_dataset, 128)
test_loader = get_data_loader(test_dataset, 10000)
get_batch_format(train_loader)

# Set device
deviceGPU = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# Loss function and learning rate
lossFn = F.cross_entropy
learningRate = 0.0001

# Define the custom MyConv class
class MyConv(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, activation=nn.ReLU(inplace=False)):
        super(MyConv, self).__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = activation

    def forward(self, x):
        x = self.conv(x)
        x = self.bn(x)
        x = self.act(x)
        return x

# Define the ConvNetResidual model using MyConv
class ConvNetResidual(nn.Module):
    def __init__(self):
        super(ConvNetResidual, self).__init__()
        self.conv1 = MyConv(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = MyConv(32, 64, kernel_size=3, stride=1, padding=1)
        self.conv3 = MyConv(64, 128, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(128 * 4 * 4, 256)
        self.fc2 = nn.Linear(256, 10)

        # Residual connections
        self.res_conv1 = nn.Conv2d(3, 32, kernel_size=1, stride=1, padding=0)
        self.res_conv2 = nn.Conv2d(32, 64, kernel_size=1, stride=1, padding=0)
        self.res_conv3 = nn.Conv2d(64, 128, kernel_size=1, stride=1, padding=0)

    def forward(self, x):
        residual = self.res_conv1(x)
        x = self.conv1(x)
        x = x + residual
        x = self.pool(x)

        residual = self.res_conv2(x)
        x = self.conv2(x)
        x = x + residual
        x = self.pool(x)

        residual = self.res_conv3(x)
        x = self.conv3(x)
        x = x + residual
        x = self.pool(x)

        x = x.view(-1, 128 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model
modelNet = ConvNetResidual().to(deviceGPU)

# Optimizer
opt = torch.optim.Adam(modelNet.parameters(), lr=learningRate, weight_decay=0.0001)

# Offload data to GPU
def off_load_on_gpu(train_loader, test_loader, device):
    train_loader_gpu = [(x.to(device), y.to(device)) for x, y in train_loader]
    test_loader_gpu = [(x.to(device), y.to(device)) for x, y in test_loader]
    return train_loader_gpu, test_loader_gpu

# Training cycle
def training_cycle(model, train_loader, device):
    model.train()
    for x, y in train_loader:
        preds = model(x)
        loss = lossFn(preds, y)
        loss.backward()
        opt.step()
        opt.zero_grad()

# Validation cycle
def validation_cycle(model, test_loader, epoch, num_epochs, device):
    model.eval()
    total_loss = 0
    total_correct = 0
    with torch.no_grad():
        for x, y in test_loader:
            preds = model(x)
            loss = lossFn(preds, y)
            total_loss += loss.item()
            total_correct += accuracy(preds, y)

    avg_loss = total_loss / len(test_loader)
    avg_acc = total_correct / len(test_loader)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}, Accuracy: {avg_acc * 100:.2f}%")

# Training and validation for one epoch
def fit_one_cycle(model, train_loader, test_loader, epoch, num_epochs, device): 
    training_cycle(model, train_loader, device)
    validation_cycle(model, test_loader, epoch, num_epochs, device)

# Load data onto GPU
train_loader_gpu, test_loader_gpu = off_load_on_gpu(train_loader, test_loader, deviceGPU)

# Train the model
num_epochs = 50
time_to_train = time.time()

for epoch in range(num_epochs):
    fit_one_cycle(modelNet, train_loader_gpu, test_loader_gpu, epoch, num_epochs, deviceGPU)

time_to_train = time.time() - time_to_train
print(f"Time to train: {time_to_train:.2f} seconds")


Files already downloaded and verified
Files already downloaded and verified
images.shape: torch.Size([128, 3, 32, 32])
labels.shape: torch.Size([128])
Epoch 1/50, Loss: 1.1635, Accuracy: 59.01%
Epoch 2/50, Loss: 1.0136, Accuracy: 64.29%
Epoch 3/50, Loss: 0.9374, Accuracy: 66.94%
Epoch 4/50, Loss: 0.8843, Accuracy: 68.80%
Epoch 5/50, Loss: 0.8491, Accuracy: 70.33%
Epoch 6/50, Loss: 0.8289, Accuracy: 71.14%
Epoch 7/50, Loss: 0.8164, Accuracy: 71.71%
Epoch 8/50, Loss: 0.8094, Accuracy: 72.16%
Epoch 9/50, Loss: 0.8060, Accuracy: 72.40%
Epoch 10/50, Loss: 0.8111, Accuracy: 72.35%
Epoch 11/50, Loss: 0.8177, Accuracy: 72.35%
Epoch 12/50, Loss: 0.8271, Accuracy: 72.19%
Epoch 13/50, Loss: 0.8421, Accuracy: 72.12%
Epoch 14/50, Loss: 0.8529, Accuracy: 72.21%
Epoch 15/50, Loss: 0.8715, Accuracy: 71.96%
Epoch 16/50, Loss: 0.8835, Accuracy: 72.16%
Epoch 17/50, Loss: 0.8991, Accuracy: 72.11%
Epoch 18/50, Loss: 0.9135, Accuracy: 72.37%
Epoch 19/50, Loss: 0.9331, Accuracy: 72.55%
Epoch 20/50, Loss: 0.9