In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import torch 
import torch.nn as nn
import torchvision
import plotly.express as px
import torchvision.transforms as transforms
import torch.optim as optim
from torch.utils.data import random_split, ConcatDataset, Dataset
import torch.nn.functional as F
import optuna
import plotly.subplots as sp


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Load the dataset

In [2]:
# Load the training dataset
training_set = torchvision.datasets.CIFAR10( root = "./",train=True,
                                             download=True,transform = transforms.ToTensor() )

# Load the test dataset
test_set = torchvision.datasets.CIFAR10( root  = "./", train=False,
                                         download=True, transform = transforms.ToTensor())

# Split the test set to validation and test set
test_set, validation_set = random_split(test_set, [5000, 5000])


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:03<00:00, 44.1MB/s]


Extracting ./cifar-10-python.tar.gz to ./
Files already downloaded and verified


## Visualization

In [3]:
training_set

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./
    Split: Train
    StandardTransform
Transform: ToTensor()

In [4]:
print (type(training_set[0]))
print (len(training_set[0]))
training_set[0]


<class 'tuple'>
2


(tensor([[[0.2314, 0.1686, 0.1961,  ..., 0.6196, 0.5961, 0.5804],
          [0.0627, 0.0000, 0.0706,  ..., 0.4824, 0.4667, 0.4784],
          [0.0980, 0.0627, 0.1922,  ..., 0.4627, 0.4706, 0.4275],
          ...,
          [0.8157, 0.7882, 0.7765,  ..., 0.6275, 0.2196, 0.2078],
          [0.7059, 0.6784, 0.7294,  ..., 0.7216, 0.3804, 0.3255],
          [0.6941, 0.6588, 0.7020,  ..., 0.8471, 0.5922, 0.4824]],
 
         [[0.2431, 0.1804, 0.1882,  ..., 0.5176, 0.4902, 0.4863],
          [0.0784, 0.0000, 0.0314,  ..., 0.3451, 0.3255, 0.3412],
          [0.0941, 0.0275, 0.1059,  ..., 0.3294, 0.3294, 0.2863],
          ...,
          [0.6667, 0.6000, 0.6314,  ..., 0.5216, 0.1216, 0.1333],
          [0.5451, 0.4824, 0.5647,  ..., 0.5804, 0.2431, 0.2078],
          [0.5647, 0.5059, 0.5569,  ..., 0.7216, 0.4627, 0.3608]],
 
         [[0.2471, 0.1765, 0.1686,  ..., 0.4235, 0.4000, 0.4039],
          [0.0784, 0.0000, 0.0000,  ..., 0.2157, 0.1961, 0.2235],
          [0.0824, 0.0000, 0.0314,  ...,

Each row of the dataset is a tuple of length 2 containing the image and its label

In [5]:
#  List of class names corresponding to the CIFAR-10 dataset categories
class_name = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']

In [6]:
# Create a 2x2 grid of subplots
fig = sp.make_subplots(rows=3, cols=3,
                       subplot_titles=[class_name[training_set[i][1]] for i in range(9)])

for i in range(3):
    for j in range(3):
        image_tensor = training_set[i*3+j][0]
        # transpose it to (height, width, channels) instead of (channels, height, width)
        image_np = image_tensor.numpy().transpose(1, 2, 0)
        
        # Add each image to the subplots
        fig.add_trace(px.imshow(image_np).data[0], row=i+1, col=j+1)


fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False)

# Show the plot
fig.show(renderer = "iframe")

## Create Model

In [7]:
class Model(nn.Module):
    def __init__(self,  conv1_out=32, conv2_out=64, conv3_out=128, kernel_size=3, 
                 pool_kernel=2, pool_stride=2, fc1_out=256, fc2_out=128, dropout_rate=0.1):
        super(Model, self).__init__()
        
        # Model parameters
        self.conv1_out = conv1_out
        self.conv2_out = conv2_out
        self.conv3_out = conv3_out
        self.kernel_size = kernel_size
        self.pool_kernel = pool_kernel
        self.pool_stride = pool_stride
        self.fc1_out = fc1_out
        self.fc2_out = fc2_out
        self.dropout_rate = dropout_rate
        
        # Define layers
        self.conv1 = nn.Conv2d(3, self.conv1_out, self.kernel_size)
        self.conv2 = nn.Conv2d(self.conv1_out, self.conv2_out, self.kernel_size)
        self.conv3 = nn.Conv2d(self.conv2_out, self.conv3_out, self.kernel_size)
        
        self.pool = nn.MaxPool2d(self.pool_kernel, self.pool_stride)
        
        # Adjusted input size after convolutions and pooling
        self.fc1_in = self.conv3_out * 2 * 2
        
        self.fc1 = nn.Linear(self.fc1_in, self.fc1_out)
        self.fc2 = nn.Linear(self.fc1_out, self.fc2_out)
        self.fc3 = nn.Linear(self.fc2_out, 10)
        
        self.dropout = nn.Dropout(self.dropout_rate)
    
    def forward(self, x):
        
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
    
        x = x.view(-1, self.fc1_in)  
    
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  
    
        x = F.relu(self.fc2(x))
        x = self.dropout(x)  
    
        x = self.fc3(x)
        
        return x


## Some helpful Functions

In [8]:
def calculate_loss(model, data):
    total_loss = 0.0
    total_samples = 0
    
    model.eval()  # Set the model to evaluation mode
    
    with torch.no_grad():  # Disable gradient calculation
        for batch in data:
            images, labels = batch
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)  # Forward pass
            loss = criterion(outputs, labels)  # Compute the loss
            total_loss += loss.item() * labels.size(0)  # Accumulate the loss
            total_samples += labels.size(0)  # Accumulate the number of samples
    
    model.train()  # Set the model back to training mode
    return round(total_loss / total_samples, 4)  # Return the average loss

In [9]:
def calculate_accuracy(model, data):
    
    correct = 0
    total = 0
    
    model.eval()  # Set the model to evaluation mode
    
    with torch.no_grad():# Disable gradient calculation
        for batch in data:
            
            images, labels = batch
            images, labels = images.to(device), labels.to(device)

            outputs = model(images)
            # Convert to probabilities
            # probabilities = F.softmax(outputs, dim=1)
            
            # Get the predicted class by selecting the class with the highest score
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            
            # Update the number of correctly predicted samples
            correct += (predicted == labels).sum().item()
    model.train()
    return round(correct / total, 4) 

In [10]:
def train_model(model,optimizer, train_data, valid_data, epochs):

    # Lists to store losses and accuracies
    train_losses = []
    train_accuracies = []
    val_losses = []
    val_accuracies = []
    
    for epoch in range(epochs):  # loop over the dataset multiple times
        for  batch in train_data:
            # get the inputs
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)
    
            # zero the parameter gradients
            optimizer.zero_grad()
    
            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
        # Calculate training and validation loss and accuracy
        train_loss = calculate_loss(model, train_data)
        train_accuracy = calculate_accuracy(model, train_data)
        val_loss = calculate_loss(model, valid_data)
        val_accuracy = calculate_accuracy(model, valid_data)
    
        # Append the values to the lists
        train_losses.append(train_loss)
        train_accuracies.append(train_accuracy)
        val_losses.append(val_loss)
        val_accuracies.append(val_accuracy)
    
        # print statistics
        print(f"Epoch [{epoch + 1}/{epochs}]")
        print(f"Training Loss = {train_loss}, Training Accuracy = {train_accuracy}")
        print(f"Validation Loss = {val_loss}, Validation Accuracy = {val_accuracy}")
        print("\n")
    print("Done!!!")
    return  train_losses, train_accuracies, val_losses, val_accuracies 

In [11]:
def eff_train_model(model,optimizer, train_data,  epochs):

    for epoch in range(epochs):  # loop over the dataset multiple times
        for  batch in train_data:
            # get the inputs
            inputs, labels = batch
            inputs, labels = inputs.to(device), labels.to(device)
    
            # zero the parameter gradients
            optimizer.zero_grad()
    
            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

## Frist time training (with out data augmentation)

In [12]:
batch_size = 16

train_loader = torch.utils.data.DataLoader(training_set, batch_size = batch_size)

test_loader = torch.utils.data.DataLoader(test_set, batch_size = batch_size)

valid_loader = torch.utils.data.DataLoader(validation_set, batch_size = batch_size)

In [13]:
model1 = Model()
criterion = nn.CrossEntropyLoss()
optimizer1 = optim.Adam(model1.parameters(), weight_decay=1e-4)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model1.to(device)

Model(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=512, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [14]:
%%time
train_losses, train_accuracies, val_losses, val_accuracies = train_model(
                                model1, optimizer1, train_loader, valid_loader, 10 )

Epoch [1/10]
Training Loss = 1.3098, Training Accuracy = 0.5136
Validation Loss = 1.3409, Validation Accuracy = 0.4942


Epoch [2/10]
Training Loss = 1.098, Training Accuracy = 0.6085
Validation Loss = 1.1656, Validation Accuracy = 0.5862


Epoch [3/10]
Training Loss = 0.924, Training Accuracy = 0.6722
Validation Loss = 1.0207, Validation Accuracy = 0.6472


Epoch [4/10]
Training Loss = 0.8463, Training Accuracy = 0.702
Validation Loss = 0.9777, Validation Accuracy = 0.6634


Epoch [5/10]
Training Loss = 0.7766, Training Accuracy = 0.7266
Validation Loss = 0.9444, Validation Accuracy = 0.675


Epoch [6/10]
Training Loss = 0.7105, Training Accuracy = 0.7524
Validation Loss = 0.9112, Validation Accuracy = 0.693


Epoch [7/10]
Training Loss = 0.6777, Training Accuracy = 0.7637
Validation Loss = 0.9086, Validation Accuracy = 0.6894


Epoch [8/10]
Training Loss = 0.6507, Training Accuracy = 0.771
Validation Loss = 0.9066, Validation Accuracy = 0.692


Epoch [9/10]
Training Loss = 0.6954, Tr

In [15]:
epochs = list(range(1, len(train_losses) + 1))  # Epoch numbers
fig_loss = px.line(x=epochs, y=[train_losses, val_losses], 
                   title='Training and Validation Loss',
                   labels={'x': 'Epoch', 'y': 'Loss',
                           "train_losses": "Training Loss" , "val_losses":"Validation Loss" })

fig_loss.show(renderer = "iframe")

In [16]:
fig_accuracy = px.line(x=epochs, y=[train_accuracies, val_accuracies], 
                       title='Training and Validation Accuracy',
                       labels={'x': 'Epoch', 'y': 'Accuracy'})
fig_accuracy.show(renderer = "iframe")

## Data Augmentation

In [17]:
horizontal_flip = transforms.Compose([
    transforms.RandomHorizontalFlip(1),  # Randomly flip the image horizontally
    transforms.ToTensor(),              # Convert the image to a PyTorch tensor
])

In [18]:
# Define the transformations
color_jitter = transforms.Compose([
    transforms.ColorJitter(
        brightness=0.3,  
        contrast=0.3,   
        saturation=0.3, 
        hue=0.2   
    ),  # Randomly change the brightness, contrast, saturation, and hue
    
    transforms.ToTensor(),              # Convert the image to a PyTorch tensor
])

In [19]:
# Load the dataset with transformations
horizontally_flipped = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=horizontal_flip)
color_jittered = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=color_jitter)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:03<00:00, 49.2MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


In [20]:
fig = sp.make_subplots(rows=4, cols=3, subplot_titles=["Original", "Flipped", "Color-Jittered"])

for i in range(4):
    original_image = training_set[i][0]
    flipped_image = horizontally_flipped[i][0]
    color_jittered_image = color_jittered[i][0]
    
    # transpose it to (height, width, channels) instead of (channels, height, width)
    original_image = original_image.numpy().transpose(1, 2, 0)
    flipped_image = flipped_image.numpy().transpose(1, 2, 0)
    color_jittered_image = color_jittered_image.numpy().transpose(1, 2, 0)
    

    
    # Add each image to the subplots
    fig.add_trace(px.imshow(original_image).data[0], row=i+1, col=1)
    fig.add_trace(px.imshow(flipped_image).data[0], row=i+1, col=2)
    fig.add_trace(px.imshow(color_jittered_image).data[0], row=i+1, col=3)



fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False)


fig.show(renderer = "iframe")

In [21]:
combined_dataset = ConcatDataset([training_set, horizontally_flipped, color_jittered])

augmented_loader = torch.utils.data.DataLoader(combined_dataset, batch_size = batch_size, shuffle = True)

## Second Time Training (with data augmentation)

In [22]:
model2 = Model()
criterion = nn.CrossEntropyLoss()
optimizer2 = optim.Adam(model2.parameters(),weight_decay=1e-4)

model2.to(device)

Model(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=512, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=10, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [23]:
%%time
train_losses, train_accuracies, val_losses, val_accuracies = train_model(
    model2, optimizer2, augmented_loader, valid_loader, 10)

Epoch [1/10]
Training Loss = 1.0273, Training Accuracy = 0.6409
Validation Loss = 1.0649, Validation Accuracy = 0.6262


Epoch [2/10]
Training Loss = 0.8185, Training Accuracy = 0.7136
Validation Loss = 0.8954, Validation Accuracy = 0.6924


Epoch [3/10]
Training Loss = 0.7612, Training Accuracy = 0.7357
Validation Loss = 0.8689, Validation Accuracy = 0.6992


Epoch [4/10]
Training Loss = 0.6793, Training Accuracy = 0.7639
Validation Loss = 0.8258, Validation Accuracy = 0.7176


Epoch [5/10]
Training Loss = 0.6784, Training Accuracy = 0.7651
Validation Loss = 0.8255, Validation Accuracy = 0.7162


Epoch [6/10]
Training Loss = 0.6313, Training Accuracy = 0.776
Validation Loss = 0.8144, Validation Accuracy = 0.7276


Epoch [7/10]
Training Loss = 0.5667, Training Accuracy = 0.8031
Validation Loss = 0.7655, Validation Accuracy = 0.7452


Epoch [8/10]
Training Loss = 0.5847, Training Accuracy = 0.7958
Validation Loss = 0.7888, Validation Accuracy = 0.7418


Epoch [9/10]
Training Loss = 0.52

In [24]:
epochs = list(range(1, len(train_losses) + 1))  # Epoch numbers
fig_loss = px.line(x=epochs, y=[train_losses, val_losses], 
                   title='Training and Validation Loss',
                   labels={'x': 'Epoch', 'y': 'Loss',
                           "train_losses": "Training Loss" , "val_losses":"Validation Loss" })

fig_loss.show(renderer = "iframe")

In [25]:
fig_accuracy = px.line(x = epochs, y = [train_accuracies, val_accuracies], 
                       title = 'Training and Validation Accuracy',
                       labels = {'x': 'Epoch', 'y': 'Accuracy'})
fig_accuracy.show(renderer = "iframe")

## Hyperparameters Tuning

In [26]:
def objective(trial):
    # conv1_out = trial.suggest_int("conv1_out", 16, 64, step=16)
    # conv2_out = trial.suggest_int("conv2_out", 32, 128, step=32)
    # conv3_out = trial.suggest_int("conv3_out", 64, 256, step=64)
    # kernel_size = trial.suggest_int("kernel_size", 3, 5, step=2)
    # pool_kernel = trial.suggest_int("pool_kernel", 2, 3)
    # pool_stride = trial.suggest_int("pool_stride", 1, 2)
    
    # Suggest hyperparameters
    fc1_out = trial.suggest_categorical("fc1_out", [128, 256, 512, 1024])
    fc2_out = trial.suggest_categorical("fc2_out", [128, 256, 512, 1024])
    dropout_rate = trial.suggest_float("dropout_rate", 0.1, 0.4, step = 0.1)
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log = True)
    weight_decay = trial.suggest_float("weight_decay", 1e-5, 1e-2, log = True)



    # Create the model
    model = Model( fc1_out = fc1_out, fc2_out = fc2_out, dropout_rate = dropout_rate)
    model.to(device)
    
    # Define optimizer and loss function
    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.CrossEntropyLoss()

    # Start with 3 epochs
    epochs = 3
    if trial.number > 25:  # After 15 trials, increase epochs for better evaluation
        epochs = 7

    eff_train_model( model, optimizer, train_loader, epochs ) # this is wrong, You should tune on the aug data

    
    # Evaluate the model
    return calculate_accuracy(model, valid_loader)  # Optuna will try to maximize this




In [27]:
%%time
# Run the Optuna study
study = optuna.create_study(direction = "maximize")
study.optimize(objective, n_trials = 75 )

[I 2025-02-20 08:18:04,078] A new study created in memory with name: no-name-759b2e01-431e-482e-a8aa-8dd0a01df892
[I 2025-02-20 08:18:47,897] Trial 0 finished with value: 0.0964 and parameters: {'fc1_out': 128, 'fc2_out': 256, 'dropout_rate': 0.4, 'lr': 0.0012373151124907616, 'weight_decay': 0.002431337136840648}. Best is trial 0 with value: 0.0964.
[I 2025-02-20 08:19:31,452] Trial 1 finished with value: 0.5462 and parameters: {'fc1_out': 512, 'fc2_out': 128, 'dropout_rate': 0.30000000000000004, 'lr': 0.00017868280372648662, 'weight_decay': 0.0005733935543312864}. Best is trial 1 with value: 0.5462.
[I 2025-02-20 08:20:15,140] Trial 2 finished with value: 0.5172 and parameters: {'fc1_out': 256, 'fc2_out': 512, 'dropout_rate': 0.1, 'lr': 0.0003466387522339117, 'weight_decay': 0.003688688799851092}. Best is trial 1 with value: 0.5462.
[I 2025-02-20 08:20:58,425] Trial 3 finished with value: 0.0964 and parameters: {'fc1_out': 128, 'fc2_out': 512, 'dropout_rate': 0.4, 'lr': 0.000799241645

CPU times: user 1h 38min 20s, sys: 46 s, total: 1h 39min 6s
Wall time: 1h 40min 15s


In [28]:
# Print the best parameters
best_params = study.best_params
print("Best hyperparameters:", best_params)

Best hyperparameters: {'fc1_out': 256, 'fc2_out': 128, 'dropout_rate': 0.2, 'lr': 0.000657688200046267, 'weight_decay': 0.00011255457175417183}


## Last Time Training 

In [29]:
%%time
# Create a new model using the best hyperparameters
model3 = Model(
    fc1_out = best_params["fc1_out"],
    fc2_out = best_params["fc2_out"],
    dropout_rate = best_params["dropout_rate"]
)
model3.to(device)

# Define optimizer and loss function
optimizer3 = optim.Adam(model3.parameters(), 
                            lr=best_params["lr"], 
                            weight_decay=best_params["weight_decay"])

# Train the new model
train_model(model3, optimizer3, augmented_loader, valid_loader, 20)


Epoch [1/20]
Training Loss = 1.0532, Training Accuracy = 0.6206
Validation Loss = 1.0756, Validation Accuracy = 0.6108


Epoch [2/20]
Training Loss = 0.8336, Training Accuracy = 0.7067
Validation Loss = 0.9062, Validation Accuracy = 0.6798


Epoch [3/20]
Training Loss = 0.7507, Training Accuracy = 0.7403
Validation Loss = 0.845, Validation Accuracy = 0.7072


Epoch [4/20]
Training Loss = 0.6747, Training Accuracy = 0.7643
Validation Loss = 0.7977, Validation Accuracy = 0.7286


Epoch [5/20]
Training Loss = 0.5888, Training Accuracy = 0.7967
Validation Loss = 0.7481, Validation Accuracy = 0.743


Epoch [6/20]
Training Loss = 0.5542, Training Accuracy = 0.8076
Validation Loss = 0.7198, Validation Accuracy = 0.7556


Epoch [7/20]
Training Loss = 0.5543, Training Accuracy = 0.8064
Validation Loss = 0.7308, Validation Accuracy = 0.7548


Epoch [8/20]
Training Loss = 0.5365, Training Accuracy = 0.8118
Validation Loss = 0.7449, Validation Accuracy = 0.7444


Epoch [9/20]
Training Loss = 0.488

([1.0532,
  0.8336,
  0.7507,
  0.6747,
  0.5888,
  0.5542,
  0.5543,
  0.5365,
  0.4888,
  0.4882,
  0.4653,
  0.4458,
  0.4225,
  0.4383,
  0.4328,
  0.3955,
  0.4479,
  0.4143,
  0.3561,
  0.3837],
 [0.6206,
  0.7067,
  0.7403,
  0.7643,
  0.7967,
  0.8076,
  0.8064,
  0.8118,
  0.8295,
  0.8299,
  0.8366,
  0.8466,
  0.8532,
  0.8483,
  0.8481,
  0.8609,
  0.8457,
  0.8539,
  0.8765,
  0.8671],
 [1.0756,
  0.9062,
  0.845,
  0.7977,
  0.7481,
  0.7198,
  0.7308,
  0.7449,
  0.7011,
  0.7129,
  0.7067,
  0.691,
  0.6946,
  0.7253,
  0.7197,
  0.7116,
  0.7553,
  0.7582,
  0.7103,
  0.7343],
 [0.6108,
  0.6798,
  0.7072,
  0.7286,
  0.743,
  0.7556,
  0.7548,
  0.7444,
  0.7648,
  0.7654,
  0.7674,
  0.7706,
  0.7618,
  0.7604,
  0.765,
  0.7682,
  0.7558,
  0.7682,
  0.7808,
  0.7664])

In [30]:
epochs = list(range(1, len(train_losses) + 1))  # Epoch numbers
fig_loss = px.line(x=epochs, y=[train_losses, val_losses], 
                   title='Training and Validation Loss',
                   labels={'x': 'Epoch', 'y': 'Loss',
                           "train_losses": "Training Loss" , "val_losses":"Validation Loss" })

fig_loss.show(renderer = "iframe")

In [31]:
fig_accuracy = px.line(x = epochs, y = [train_accuracies, val_accuracies], 
                       title = 'Training and Validation Accuracy',
                       labels = {'x': 'Epoch', 'y': 'Accuracy'})
fig_accuracy.show(renderer = "iframe")

## Test set evaluation

In [32]:
calculate_accuracy(model3, test_loader)

0.7698