# Resnet model 


This file contains the implementation of the Transfer Learning Reznet model

In [1]:
# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet50
import os
import warnings


warnings.filterwarnings('always') 


EPOCHS = 5
LEARNING_RATE = 0.0001
BATCH_SIZE = 32

In [2]:
# Set GPU/MPS growth for the model training
device = torch.device('mps')

### Load data 

In [3]:
# Path to the train and evaluation folders
train_data_folder = '/Users/stamatiosorphanos/Documents/MultiModal-Deep-Learning/MultiModal/train'
test_data_folder = '/Users/stamatiosorphanos/Documents/MultiModal-Deep-Learning/MultiModal/test'

# Number of classes is equal to the number of videos we have as data
num_classes = len(os.listdir(train_data_folder))     



# Load and transform the training dataset
data_transform = {
    'train': transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
    transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]),

    'test': transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
}


# Load and transform the training and evaluation dataset
train_dataset = torchvision.datasets.ImageFolder(root=train_data_folder, transform=data_transform['train'])
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

eval_dataset = torchvision.datasets.ImageFolder(root=test_data_folder, transform=data_transform['test'])
eval_loader = torch.utils.data.DataLoader(eval_dataset, batch_size=BATCH_SIZE, shuffle=False)

In [4]:
# Create a training results file
training_file = open('train.txt', 'a')

# Load the pre-trained ResNet-50 model
model = resnet50(pretrained=True)

# Freeze the initial layers
for param in model.parameters():
    param.requires_grad = False

# Replace the last fully connected layer to match the number of classes
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)

# Move the model to the device (GPU/CPU)
model = model.to(device)


# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# Train the model
total_step = len(train_loader)


for epoch in range(EPOCHS):
    correct = 0
    total_samples = 0

    for i, (images, labels) in enumerate(train_loader):
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print training progress
        if (i + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{EPOCHS}], Step [{i+1}/{total_step}], Loss: {loss.item():.4f}')


        # Compute predictions
        with torch.no_grad():
            outputs = model(images)
            _, predicted = torch.topk(outputs, k=10, dim=1)
            total_samples += labels.size(0)
            correct += (predicted == labels.view(-1, 1)).sum().item()

    # Calculate top-5 accuracy
    accuracy = correct / total_samples

    # Print precision and recall per epoch
    print(f'Epoch [{epoch+1}/{EPOCHS}] - Top-5 Accuracy: {accuracy:.4f}')
    training_file.writelines(f'Epoch [{epoch+1}/{EPOCHS}] - Top-10 Accuracy: {accuracy:.4f}\n')


training_file.close()
print('Training finished.')

# Save the trained model
os.makedirs('saved_models', exist_ok=True)
torch.save(model, 'saved_models/resnet50_trained.pt')



Epoch [1/5], Step [100/691], Loss: 4.3598
Epoch [1/5], Step [200/691], Loss: 4.1650
Epoch [1/5], Step [300/691], Loss: 3.8337
Epoch [1/5], Step [400/691], Loss: 3.7135
Epoch [1/5], Step [500/691], Loss: 3.1672
Epoch [1/5], Step [600/691], Loss: 3.4129
Epoch [1/5] - Top-5 Accuracy: 0.5568
Epoch [2/5], Step [100/691], Loss: 3.0012
Epoch [2/5], Step [200/691], Loss: 3.1153
Epoch [2/5], Step [300/691], Loss: 3.0218
Epoch [2/5], Step [400/691], Loss: 3.0317
Epoch [2/5], Step [500/691], Loss: 2.8681
Epoch [2/5], Step [600/691], Loss: 2.7558
Epoch [2/5] - Top-5 Accuracy: 0.8064
Epoch [3/5], Step [100/691], Loss: 2.9717
Epoch [3/5], Step [200/691], Loss: 2.2536
Epoch [3/5], Step [300/691], Loss: 2.3389
Epoch [3/5], Step [400/691], Loss: 2.3445
Epoch [3/5], Step [500/691], Loss: 2.4903
Epoch [3/5], Step [600/691], Loss: 2.6867
Epoch [3/5] - Top-5 Accuracy: 0.8579
Epoch [4/5], Step [100/691], Loss: 2.1421
Epoch [4/5], Step [200/691], Loss: 2.3611
Epoch [4/5], Step [300/691], Loss: 2.3535
Epoch [

In [5]:
# Switch to evaluation mode
load_model = torch.load('saved_models/resnet50_trained.pt')
load_model.eval()

# Evaluation
total_correct = 0

for images, labels in eval_loader:
    images = images.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        outputs = load_model(images)
        _, predicted = torch.max(outputs.data, 1)
        total_correct += (predicted == labels).sum().item()

accuracy = total_correct / len(eval_dataset)
print(f'Accuracy on evaluation data: {accuracy:.2%}')

Accuracy on evaluation data: 57.50%
