In [22]:
import torch
import os

def save_pytorch_model(model, filepath, epoch=None):
    """
    Save a PyTorch model to disk, including both architecture and weights.
    
    Args:
        model: PyTorch model (nn.Module)
        filepath: Path to save the model
        epoch: Optional epoch number to include in the save
    """
    # Create the directory if it doesn't exist
    os.makedirs(os.path.dirname(filepath) if os.path.dirname(filepath) else '.', exist_ok=True)
    
    # Prepare the save dictionary
    save_dict = {
        'model_state_dict': model.state_dict(),
        'model_args': model.args,  # Saving the model arguments
        'epoch': epoch if epoch is not None else None
    }
    
    # Save the model
    torch.save(save_dict, filepath)
    print(f"Model saved successfully to {filepath}")

def load_pytorch_model(filepath):
    """
    Load a PyTorch model from disk.
    
    Args:
        filepath: Path to the saved model
        
    Returns:
        model: Loaded PyTorch model
        epoch: Epoch number when the model was saved (if available)
    """
    # Load the save dictionary
    save_dict = torch.load(filepath)
    
    # Create a new model instance with the saved arguments
    model = ImageMamba(args=save_dict['model_args'], num_classes=1000)  # Adjust num_classes as needed
    
    # Load the state dictionary
    model.load_state_dict(save_dict['model_state_dict'])
    
    return model, save_dict.get('epoch')

# Example usage:
# Saving the model
# save_pytorch_model(complex_model, '25epoch_complex_model.pt', epoch=25)

# Loading the model
# loaded_model, epoch = load_pytorch_model('25epoch_complex_model.pt')

# New dataset
Find new dataset

In [1]:
!python --version

Python 3.9.20


In [2]:
!pip install tensorflow==2.18.0



In [7]:
!pip uninstall keras tensorflow
!pip install keras tensorflow

^C


In [1]:
!pip install keras==3.6



# Imports

In [8]:
import numpy as np
from tensorflow import keras
from keras.datasets import cifar10
from __future__ import print_function
from keras.models import Sequential
from keras.models import save_model, load_model
from keras.layers import Dense, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D

import keras.backend as K
K.clear_session()

# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
# from model import Mamba, ModelArgs  # Import your custom Mamba implementation
# Assuming the model classes are defined in `model.py`
from model import ImageMamba, ModelArgs

In [2]:
# Load CIFAR-10 data
(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

# Reshape and preprocess the CIFAR-10 dataset for PyTorch models
X_train = X_train.transpose(0, 3, 1, 2)  # Shape: (batch_size, channels, height, width)
X_test = X_test.transpose(0, 3, 1, 2)

# Convert data to float and normalize pixel values in the range [0, 1]
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Convert the train/test data into PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.long)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.long)

# Create PyTorch datasets and data loaders
train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

In [17]:
# Define the unique class names for CIFAR-10
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 
               'dog', 'frog', 'horse', 'ship', 'truck']


# Defining the model

In [3]:
# Define model parameters
d_model = 64
n_layer = 4
num_classes = 10  # CIFAR-10 has 10 classes

# Create an instance of ModelArgs
model_args = ModelArgs(d_model=d_model, n_layer=n_layer, vocab_size=0)  # vocab_size is unused here

# Instantiate the ImageMamba model
model = ImageMamba(model_args, num_classes=num_classes)

# Set the device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

ImageMamba(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (layers): ModuleList(
    (0-3): 4 x ImageResidualBlock(
      (conv): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
  )
  (norm_f): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Linear(in_features=128, out_features=10, bias=True)
)

## Training MAMBA on CIFAR10

In [4]:
print(torch.cuda.is_available())

True


In [18]:
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Thu_Sep_12_02:55:00_Pacific_Daylight_Time_2024
Cuda compilation tools, release 12.6, V12.6.77
Build cuda_12.6.r12.6/compiler.34841621_0


In [None]:
#!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124

^C


# Imports

In [5]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        logits, probabilities = model(inputs)  # Unpack the logits and probabilities

        # Flatten labels if they are not already
        labels = labels.view(-1)  # Flatten the labels to [batch_size]

        # Compute loss
        loss = criterion(logits, labels)  # Use logits for loss computation

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')

Epoch [1/10], Loss: 1.9929
Epoch [2/10], Loss: 1.8012
Epoch [3/10], Loss: 1.7019


KeyboardInterrupt: 

## Evaluating the model + testing inference

In [None]:
# Switch to evaluation mode
model.eval()
y_pred = []
y_true = []
y_prob = []  # List to store probabilities

# Test the model on the test dataset
with torch.no_grad():
    for inputs, labels in test_dataset:  # Use test_dataset directly
        inputs, labels = inputs.to(device), labels.to(device)
        logits, probabilities = model(inputs)  # Now get both logits and probabilities
        _, predicted = torch.max(logits, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())
        y_prob.extend(probabilities.cpu().numpy())  # Store probabilities

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy on the test set: {accuracy:.4f}')

# Convert probabilities to percentages
y_prob_percentages = np.array(y_prob) * 100  # Convert probabilities to percentages

# Display the first few predicted class names along with their probabilities and the correct class
print("First few predicted class names and their probabilities (in percentages):")
for i in range(5):
    print(f"Instance {i+1}:")
    print(f"  Correct Class: {class_names[y_true[i]]}")
    for class_index, class_name in enumerate(class_names):
        print(f"  Class: {class_name}, Probability: {y_prob_percentages[i][class_index]:.2f}%")

# Switch to evaluation mode for training data
model.eval()
y_pred_train = []
y_true_train = []
y_prob_train = []  # List to store probabilities

# Test the model on training data
with torch.no_grad():
    for inputs, labels in train_dataset:  # Use train_dataset directly
        inputs, labels = inputs.to(device), labels.to(device)
        logits, probabilities = model(inputs)  # Now get both logits and probabilities
        _, predicted = torch.max(logits, 1)
        y_pred_train.extend(predicted.cpu().numpy())
        y_true_train.extend(labels.cpu().numpy())
        y_prob_train.extend(probabilities.cpu().numpy())  # Store probabilities

# Calculate accuracy on training data
accuracy_train = accuracy_score(y_true_train, y_pred_train)
print(f'Accuracy on the training set: {accuracy_train:.4f}')

# Convert probabilities to percentages
y_prob_train_percentages = np.array(y_prob_train) * 100  # Convert probabilities to percentages

# Display the first few predicted class names along with their probabilities and the correct class
print("First few predicted class names and their probabilities (in percentages) for training data:")
for i in range(5):
    print(f"Instance {i+1}:")
    print(f"  Correct Class: {class_names[y_true_train[i]]}")
    for class_index, class_name in enumerate(class_names):
        print(f"  Class: {class_name}, Probability: {y_prob_train_percentages[i][class_index]:.2f}%")

In [None]:
# Extract probabilities for the correct class on the test data
correct_class_probs_test = [y_prob[i][y_true[i]] for i in range(len(y_true))]
average_prob_correct_class_test = np.mean(correct_class_probs_test)
print(f'Average probability for the correct class on the test data: {average_prob_correct_class_test:.4f}')

# Extract probabilities for the correct class on the training data
correct_class_probs_train = [y_prob_train[i][y_true_train[i]] for i in range(len(y_true_train))]
average_prob_correct_class_train = np.mean(correct_class_probs_train)
print(f'Average probability for the correct class on the training data: {average_prob_correct_class_train:.4f}')

# Try 2, more epochs
As we can see there is not much difference between the probabilities of true class predictions on the traindataset instances, and the testdataset instances.

This might indicate that the dataset was hard easy for 10 epochs, lets add 15 more epochs, to get to a total of 25 epochs

In [None]:
# Training loop
num_epochs = 15
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
 
        # Forward pass
        logits, probabilities = model(inputs)  # Unpack the logits and probabilities

        # Flatten labels if they are not already
        labels = labels.view(-1)  # Flatten the labels to [batch_size]

        # Compute loss
        loss = criterion(logits, labels)  # Use logits for loss computation

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')

In [None]:
# Switch to evaluation mode
model.eval()
y_pred = []
y_true = []
y_prob = []  # List to store probabilities

# Test the model on the test dataset
with torch.no_grad():
    for inputs, labels in test_dataset:  # Use test_dataset directly
        inputs, labels = inputs.to(device), labels.to(device)
        logits, probabilities = model(inputs)  # Now get both logits and probabilities
        _, predicted = torch.max(logits, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())
        y_prob.extend(probabilities.cpu().numpy())  # Store probabilities

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy on the test set: {accuracy:.4f}')

# Convert probabilities to percentages
y_prob_percentages = np.array(y_prob) * 100  # Convert probabilities to percentages

# Display the first few predicted class names along with their probabilities and the correct class
print("First few predicted class names and their probabilities (in percentages):")
for i in range(5):
    print(f"Instance {i+1}:")
    print(f"  Correct Class: {class_names[y_true[i]]}")
    for class_index, class_name in enumerate(class_names):
        print(f"  Class: {class_name}, Probability: {y_prob_percentages[i][class_index]:.2f}%")

# Switch to evaluation mode for training data
model.eval()
y_pred_train = []
y_true_train = []
y_prob_train = []  # List to store probabilities

# Test the model on training data
with torch.no_grad():
    for inputs, labels in train_dataset:  # Use train_dataset directly
        inputs, labels = inputs.to(device), labels.to(device)
        logits, probabilities = model(inputs)  # Now get both logits and probabilities
        _, predicted = torch.max(logits, 1)
        y_pred_train.extend(predicted.cpu().numpy())
        y_true_train.extend(labels.cpu().numpy())
        y_prob_train.extend(probabilities.cpu().numpy())  # Store probabilities

# Calculate accuracy on training data
accuracy_train = accuracy_score(y_true_train, y_pred_train)
print(f'Accuracy on the training set: {accuracy_train:.4f}')

# Convert probabilities to percentages
y_prob_train_percentages = np.array(y_prob_train) * 100  # Convert probabilities to percentages

# Display the first few predicted class names along with their probabilities and the correct class
print("First few predicted class names and their probabilities (in percentages) for training data:")
for i in range(5):
    print(f"Instance {i+1}:")
    print(f"  Correct Class: {class_names[y_true_train[i]]}")
    for class_index, class_name in enumerate(class_names):
        print(f"  Class: {class_name}, Probability: {y_prob_train_percentages[i][class_index]:.2f}%")

In [None]:
# Extract probabilities for the correct class on the test data
correct_class_probs_test = [y_prob[i][y_true[i]] for i in range(len(y_true))]
average_prob_correct_class_test = np.mean(correct_class_probs_test)
print(f'Average probability for the correct class on the test data: {average_prob_correct_class_test:.4f}')

# Extract probabilities for the correct class on the training data
correct_class_probs_train = [y_prob_train[i][y_true_train[i]] for i in range(len(y_true_train))]
average_prob_correct_class_train = np.mean(correct_class_probs_train)
print(f'Average probability for the correct class on the training data: {average_prob_correct_class_train:.4f}')

As we see still no significant diffirence, lets make a stronger model, and train more epochs to achieve a loss close to 0, and then try inference again:

# Try 3, complex model
To get a much lower loss, we will both increase the number of epochs and make the model more complex.

In [None]:
# Define model parameters
d_model = 128
n_layer = 8
num_classes = 10  # CIFAR-10 has 10 classes

# Create an instance of ModelArgs
model_args = ModelArgs(d_model=d_model, n_layer=n_layer, vocab_size=0)  # vocab_size is unused here

# Instantiate the ImageMamba model
complex_model = ImageMamba(model_args, num_classes=num_classes)

# Set the device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
complex_model.to(device)

ImageMamba(
  (conv1): Conv2d(3, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (layers): ModuleList(
    (0-7): 8 x ImageResidualBlock(
      (conv): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    )
  )
  (norm_f): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): AdaptiveAvgPool2d(output_size=(1, 1))
  (fc): Linear(in_features=256, out_features=10, bias=True)
)

In [7]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(complex_model.parameters(), lr=1e-4, weight_decay=1e-5)

# Training loop
num_epochs = 25 
for epoch in range(num_epochs):
    complex_model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        logits, probabilities = complex_model(inputs)  # Unpack the logits and probabilities

        # Flatten labels if they are not already
        labels = labels.view(-1)  # Flatten the labels to [batch_size]

        # Compute loss
        loss = criterion(logits, labels)  # Use logits for loss computation

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')

Epoch [1/25], Loss: 1.9134
Epoch [2/25], Loss: 1.6960
Epoch [3/25], Loss: 1.5853
Epoch [4/25], Loss: 1.4995
Epoch [5/25], Loss: 1.4248
Epoch [6/25], Loss: 1.3690
Epoch [7/25], Loss: 1.3213
Epoch [8/25], Loss: 1.2828
Epoch [9/25], Loss: 1.2517
Epoch [10/25], Loss: 1.2271
Epoch [11/25], Loss: 1.2041
Epoch [12/25], Loss: 1.1814
Epoch [13/25], Loss: 1.1629
Epoch [14/25], Loss: 1.1459
Epoch [15/25], Loss: 1.1294
Epoch [16/25], Loss: 1.1153
Epoch [17/25], Loss: 1.0992
Epoch [18/25], Loss: 1.1103
Epoch [19/25], Loss: 1.1682
Epoch [20/25], Loss: 1.1348
Epoch [21/25], Loss: 1.1272
Epoch [22/25], Loss: 1.1174
Epoch [23/25], Loss: 1.0885
Epoch [24/25], Loss: 1.0898
Epoch [25/25], Loss: 1.0860


In [25]:
# Save the model after 25 epochs
save_pytorch_model(complex_model, '../trained_models/25epoch_complex_model.pt', epoch=25)

# Later, to load the model:
# loaded_model, epoch = load_pytorch_model('25epoch_complex_model.pt')

Model saved successfully to ../trained_models/25epoch_complex_model.pt


In [18]:
# Switch to evaluation mode
complex_model.eval()
y_pred = []
y_true = []
y_prob = []  # List to store probabilities

# Test the model on the test dataset
with torch.no_grad():
    for inputs, labels in test_dataset:  # Use test_dataset directly
        inputs, labels = inputs.to(device), labels.to(device)
        logits, probabilities = complex_model(inputs)  # Now get both logits and probabilities
        _, predicted = torch.max(logits, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())
        y_prob.extend(probabilities.cpu().numpy())  # Store probabilities

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy on the test set: {accuracy:.4f}')

# Convert probabilities to percentages
y_prob_percentages = np.array(y_prob) * 100  # Convert probabilities to percentages

# Display the first few predicted class names along with their probabilities and the correct class
print("First few predicted class names and their probabilities (in percentages):")
for i in range(5):
    print(f"Instance {i+1}:")
    print(f"  Correct Class: {class_names[y_true[i]]}")
    for class_index, class_name in enumerate(class_names):
        print(f"  Class: {class_name}, Probability: {y_prob_percentages[i][class_index]:.2f}%")

# Switch to evaluation mode for training data
complex_model.eval()
y_pred_train = []
y_true_train = []
y_prob_train = []  # List to store probabilities

# Test the model on training data
with torch.no_grad():
    for inputs, labels in train_dataset:  # Use train_dataset directly
        inputs, labels = inputs.to(device), labels.to(device)
        logits, probabilities = complex_model(inputs)  # Now get both logits and probabilities
        _, predicted = torch.max(logits, 1)
        y_pred_train.extend(predicted.cpu().numpy())
        y_true_train.extend(labels.cpu().numpy())
        y_prob_train.extend(probabilities.cpu().numpy())  # Store probabilities

# Calculate accuracy on training data
accuracy_train = accuracy_score(y_true_train, y_pred_train)
print(f'Accuracy on the training set: {accuracy_train:.4f}')

# Convert probabilities to percentages
y_prob_train_percentages = np.array(y_prob_train) * 100  # Convert probabilities to percentages

# Display the first few predicted class names along with their probabilities and the correct class
print("First few predicted class names and their probabilities (in percentages) for training data:")
for i in range(5):
    print(f"Instance {i+1}:")
    print(f"  Correct Class: {class_names[y_true_train[i]]}")
    for class_index, class_name in enumerate(class_names):
        print(f"  Class: {class_name}, Probability: {y_prob_train_percentages[i][class_index]:.2f}%")

Accuracy on the test set: 0.6134
First few predicted class names and their probabilities (in percentages):
Instance 1:
  Correct Class: cat
  Class: airplane, Probability: 0.82%
  Class: automobile, Probability: 1.33%
  Class: bird, Probability: 6.28%
  Class: cat, Probability: 47.06%
  Class: deer, Probability: 2.20%
  Class: dog, Probability: 10.97%
  Class: frog, Probability: 25.96%
  Class: horse, Probability: 0.13%
  Class: ship, Probability: 4.24%
  Class: truck, Probability: 1.03%
Instance 2:
  Correct Class: ship
  Class: airplane, Probability: 4.24%
  Class: automobile, Probability: 31.70%
  Class: bird, Probability: 0.07%
  Class: cat, Probability: 0.04%
  Class: deer, Probability: 0.01%
  Class: dog, Probability: 0.01%
  Class: frog, Probability: 0.01%
  Class: horse, Probability: 0.01%
  Class: ship, Probability: 52.40%
  Class: truck, Probability: 11.52%
Instance 3:
  Correct Class: ship
  Class: airplane, Probability: 8.50%
  Class: automobile, Probability: 3.96%
  Class:

In [19]:
# Extract probabilities for the correct class on the test data
correct_class_probs_test = [y_prob[i][y_true[i]] for i in range(len(y_true))]
average_prob_correct_class_test = np.mean(correct_class_probs_test)
print(f'Average probability for the correct class on the test data: {average_prob_correct_class_test:.4f}')

# Extract probabilities for the correct class on the training data
correct_class_probs_train = [y_prob_train[i][y_true_train[i]] for i in range(len(y_true_train))]
average_prob_correct_class_train = np.mean(correct_class_probs_train)
print(f'Average probability for the correct class on the training data: {average_prob_correct_class_train:.4f}')

Average probability for the correct class on the test data: 0.4554
Average probability for the correct class on the training data: 0.4649


In [21]:
print("Difference in percentage is: ", (average_prob_correct_class_train - average_prob_correct_class_test)/average_prob_correct_class_train*100)

Difference in percentage is:  2.050626650452614


I think the model started to overfit which is why the loss stopped decreasing, now we will try a different optimiser and scheduler

In [None]:
from torch.optim.lr_scheduler import CosineAnnealingLR
import torch.optim as optim

# Define optimizer with AdamW
optimizer = optim.AdamW(complex_model.parameters(), lr=1e-3, weight_decay=1e-4)

# Cosine Annealing Scheduler
scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)

# Training Loop with Scheduler
num_epochs = 50  # More epochs to ensure convergence
for epoch in range(num_epochs):
    complex_model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        logits, probabilities = complex_model(inputs)

        # Flatten labels if necessary
        labels = labels.view(-1)

        # Compute loss
        loss = criterion(logits, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    # Step the scheduler at each epoch
    scheduler.step()

    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(train_loader):.4f}')


In [None]:
save_model(complex_model, '50epch_complex_model.h5')

In [None]:
# Switch to evaluation mode
complex_model.eval()
y_pred = []
y_true = []
y_prob = []  # List to store probabilities

# Test the model on the test dataset
with torch.no_grad():
    for inputs, labels in test_dataset:  # Use test_dataset directly
        inputs, labels = inputs.to(device), labels.to(device)
        logits, probabilities = complex_model(inputs)  # Now get both logits and probabilities
        _, predicted = torch.max(logits, 1)
        y_pred.extend(predicted.cpu().numpy())
        y_true.extend(labels.cpu().numpy())
        y_prob.extend(probabilities.cpu().numpy())  # Store probabilities

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy on the test set: {accuracy:.4f}')

# Convert probabilities to percentages
y_prob_percentages = np.array(y_prob) * 100  # Convert probabilities to percentages

# Display the first few predicted class names along with their probabilities and the correct class
print("First few predicted class names and their probabilities (in percentages):")
for i in range(5):
    print(f"Instance {i+1}:")
    print(f"  Correct Class: {class_names[y_true[i]]}")
    for class_index, class_name in enumerate(class_names):
        print(f"  Class: {class_name}, Probability: {y_prob_percentages[i][class_index]:.2f}%")

# Switch to evaluation mode for training data
complex_model.eval()
y_pred_train = []
y_true_train = []
y_prob_train = []  # List to store probabilities

# Test the model on training data
with torch.no_grad():
    for inputs, labels in train_dataset:  # Use train_dataset directly
        inputs, labels = inputs.to(device), labels.to(device)
        logits, probabilities = complex_model(inputs)  # Now get both logits and probabilities
        _, predicted = torch.max(logits, 1)
        y_pred_train.extend(predicted.cpu().numpy())
        y_true_train.extend(labels.cpu().numpy())
        y_prob_train.extend(probabilities.cpu().numpy())  # Store probabilities

# Calculate accuracy on training data
accuracy_train = accuracy_score(y_true_train, y_pred_train)
print(f'Accuracy on the training set: {accuracy_train:.4f}')

# Convert probabilities to percentages
y_prob_train_percentages = np.array(y_prob_train) * 100  # Convert probabilities to percentages

# Display the first few predicted class names along with their probabilities and the correct class
print("First few predicted class names and their probabilities (in percentages) for training data:")
for i in range(5):
    print(f"Instance {i+1}:")
    print(f"  Correct Class: {class_names[y_true_train[i]]}")
    for class_index, class_name in enumerate(class_names):
        print(f"  Class: {class_name}, Probability: {y_prob_train_percentages[i][class_index]:.2f}%")

In [None]:
# Extract probabilities for the correct class on the test data
correct_class_probs_test = [y_prob[i][y_true[i]] for i in range(len(y_true))]
average_prob_correct_class_test = np.mean(correct_class_probs_test)
print(f'Average probability for the correct class on the test data: {average_prob_correct_class_test:.4f}')

# Extract probabilities for the correct class on the training data
correct_class_probs_train = [y_prob_train[i][y_true_train[i]] for i in range(len(y_true_train))]
average_prob_correct_class_train = np.mean(correct_class_probs_train)
print(f'Average probability for the correct class on the training data: {average_prob_correct_class_train:.4f}')