In [2]:
import torch
from torch import nn
# aliter: import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torch.utils.data import Dataset, DataLoader
import math
import torchvision
import torchvision.transforms.v2 as transforms
import torchvision.transforms.functional as TF
import random

import numpy as np
import cv2
from PIL import Image
import os
import sys
import matplotlib.pyplot as plt
from collections import namedtuple
from torchvision import models

# device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

from dataset import SRdatasets
from model import FSRCNN
from utils import PSNR, predict, display_random, display_particular
from vgg import Vgg16, PerceptualLoss



# Initialize VGG-16 feature extractor (requires_grad=False for fixed weights)
vgg_model = Vgg16(requires_grad=False).to(device)

# Initialize Perceptual Loss
perceptual_loss_fn = PerceptualLoss(vgg_model).to(device)

# instantiate dataset object
dataset = SRdatasets(dataset_path = 'C:/Users/athar/MLprojects/dataloader_task/Datasets')

print("dataset class, vgg model, and perceptual loss function initialised successfully")



dataset class, vgg model, and perceptual loss function initialised successfully


In [None]:

train_size = 512*2 # 512*2 = 1024, *2 becuase of deterministic transform (horizontal flip)
# val_size = 17*2
# test_size = 17*2
test_size = 17*4

num_epochs = 5
learning_rate = 1e-3
batch_size = 16


model = FSRCNN().to(device)
pixel_loss = nn.MSELoss()
optimizer = torch.optim.SGD(
    [
        {'params': model.first_part.parameters()},
        {'params': model.mid_part.parameters()},
        {'params': model.last_part.parameters(), 'lr': learning_rate * 0.1}
    ],
    lr = learning_rate
) # "low learning rate for last layer gives promising results" ~authors of SRCNN paper

batch_loss_agg = 0 # batch loss aggregator (aggregates the loss accross given batch)
batch_loss_list = []

batch_acc_agg = 0
batch_acc_list = []

epoch_loss = 0
epoch_loss_list = []
epoch_acc_list = []


# variables for validation accuracy
batch_val_agg = 0
batch_val_list = []
epoch_val_list = []

prev_epochs = 0

checkpnt_flag = 1
# transfering previous checkpoint
PATH = "fsrcnn_phase1.pt"
try:
#{
    checkpoint = torch.load(PATH, weights_only = True)
    print('checkpoint loaded successfully')
    transfer = int(input("transfer previous model? 1/0: "))
    if(transfer == 1):
        model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        epoch_loss_list = checkpoint['epoch_loss_list']
        epoch_acc_list = checkpoint['epoch_acc_list']
        prev_epochs = checkpoint['epoch']
        print('model transfered successfully')
    
    # this utility is throwing error - can't pickle the train_set and test_set instances. 
    # For now, I have fixed the seed value to ensure that always the same partition is being done
    # plan to work it out later
    transfer_sets = int(input('transfer the previous train, val, and test sets? (1/0): '))
    if(transfer_sets == 1):
    #{
        try:
            train_set = checkpoint['train_set']
            # val_set = checkpoint['val_set']
            test_set = checkpoint['test_set']
            print('partitioned datasets loaded successfully')
        except:
            print('couldn\'t load partitioned datasets. gotta partition them freshly')
            # torch.manual_seed(5)
            # train_set, val_set, test_set = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
            train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])
    #}
    else:
        # train_set, val_set, test_set = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])
        train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])
    
#}
except Exception as e:
    print('Exception occured, running without loading checkpoint')
    checkpnt_flag = 0
    print(e)

if checkpnt_flag == 0:
    train_set, test_set = torch.utils.data.random_split(dataset, [train_size, test_size])

# initialising train and test loaders
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle = True)
# val_loader = torch.utils.data.DataLoader(val_set, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = batch_size, shuffle = True)

num_of_batches = len(train_loader)

# training
print("starting training")
gradient_accumulation_steps = 2 # effectivec batch size = 64
for epoch in range(num_epochs):
    for i, (inputs, targets) in enumerate(train_loader):
        inputs = inputs.to(device)
        targets = targets.to(device)

        outputs = model(inputs)
        outputs = outputs.to(device)
        perceptual_loss_fn(outputs, targets)
        loss = pixel_loss(outputs, targets) + 0.01* perceptual_loss

        loss.backward() # computes and accumulates gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

        if( (i+1) % gradient_accumulation_steps == 0 ):
            optimizer.step() # updates parameters after 4 batches (64 images) are analysed
            optimizer.zero_grad() # resets gradients to 0
            
        
        batch_loss_agg = batch_loss_agg + loss.item()/num_of_batches
        # batch_loss_list.append(loss.item())

        step_train_accuracy = PSNR().batch(outputs, targets)
        psnr_input = PSNR().batch(inputs, targets)
        # batch_acc_list.append(step_train_accuracy)
        batch_acc_agg = batch_acc_agg + step_train_accuracy/num_of_batches

        
        if (i + 1) % 4 == 0:
            print(f'Epoch [{epoch + 1}/{num_epochs}], Step [{i + 1}/{len(train_loader)}], Loss: {loss.item():.4f}, PSNR: {step_train_accuracy:.4f}, diff(pred - input): {(step_train_accuracy - psnr_input):.4f} ')

        # for freeing up gpu space
        # del inputs, targets, outputs, loss
        # torch.cuda.empty_cache()
        
    epoch_acc_list.append(batch_acc_agg)
    epoch_loss_list.append(batch_loss_agg)
    
    batch_loss_agg = 0
    batch_acc_agg = 0

# plotting quality metrics
plt.plot(range(1, prev_epochs + num_epochs+1), epoch_loss_list)
plt.title('loss vs epochs')
plt.show()
print('epoch_loss_list: ', epoch_loss_list)
plt.plot(range(1, prev_epochs + num_epochs+1), epoch_acc_list)
plt.title('PSNR vs epochs')
print('Training finished')
plt.show()


# displaying some randomly picked examples
display_random(test_loader, model, batch_size)

# for observing a particular sample (particular image)
see_part = int(input('want to see particular image?(1/0): '))
if see_part:
    i = int(input("Enter the number of example to display: "))
    display_particular(i, model)

# saving model
PATH = "fsrcnn_phase1.pt"
torch.save({
            'epoch_loss_list': epoch_loss_list,
            'epoch_acc_list': epoch_acc_list,
            'epoch': prev_epochs + num_epochs,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            # 'train_set': train_set,
            # 'test_set': test_set,
            }, PATH)

print(f"model saved as '{PATH}'")
# stores at: C:\Users\athar\AppData\Roaming\Python\Python312\site-packages
  

checkpoint loaded successfully


transfer previous model? 1/0:  1


Exception occured, running without loading checkpoint
loaded state dict has a different number of parameter groups
starting training
Epoch [1/5], Step [4/64], Loss: 5876.8013, PSNR: 10.4394, diff(pred - input): -11.9229 
Epoch [1/5], Step [8/64], Loss: 6432.2822, PSNR: 10.0472, diff(pred - input): -12.8443 
Epoch [1/5], Step [12/64], Loss: 5599.4639, PSNR: 10.6493, diff(pred - input): -13.8554 
Epoch [1/5], Step [16/64], Loss: 6345.1768, PSNR: 10.1064, diff(pred - input): -13.2940 
Epoch [1/5], Step [20/64], Loss: 6232.0923, PSNR: 10.1845, diff(pred - input): -13.6307 
Epoch [1/5], Step [24/64], Loss: 5891.6167, PSNR: 10.4285, diff(pred - input): -12.4405 
Epoch [1/5], Step [28/64], Loss: 4936.6641, PSNR: 11.1965, diff(pred - input): -13.5136 
Epoch [1/5], Step [32/64], Loss: 5217.3931, PSNR: 10.9563, diff(pred - input): -13.2347 
Epoch [1/5], Step [36/64], Loss: 5630.3184, PSNR: 10.6255, diff(pred - input): -10.7867 
Epoch [1/5], Step [40/64], Loss: 4906.7656, PSNR: 11.2229, diff(pred

In [7]:
from torch.utils.data import DataLoader

# Assuming SRdatasets() is your dataset class
sample_set = SRdatasets()

# Use a DataLoader for batching
batch_size = 16  # Example batch size, adjust as needed
train_loader = DataLoader(sample_set, batch_size=16, shuffle=True)
learning_rate = 1e-3
pixel_loss = nn.MSELoss()


optimizer = torch.optim.Adam(model.parameters())  # Assuming you're using Adam optimizer
model.to(device)  # Move model to the device (GPU/CPU)
model.train()     # Set model to training mode

# Training loop
for epoch in range(2):  # Loop over epochs
    for input_tensor, target in train_loader:
        # Move input and target to the correct device (GPU/CPU)
        input_tensor = input_tensor.to(device)
        target = target.to(device)
        print(input_tensor)
        print(f'input_tensor_size = {input_tensor.shape}')
        # Forward pass
        prediction = model(input_tensor)
        print(f'prediction shape = {prediction.shape}')
        print(prediction)
        # Compute the loss
        loss = pixel_loss(prediction, target)
        
        # Backward pass and optimization
        optimizer.zero_grad()  # Reset gradients
        loss.backward()        # Backpropagate
        optimizer.step()       # Update weights
        
        # Print loss for each batch
        print(f'Epoch [{epoch + 1}/5], Loss: {loss.item():.4f}')


tensor([[[[ 66., 109., 180.,  ..., 167., 168., 168.],
          [115., 148., 201.,  ..., 192., 191., 191.],
          [195., 211., 236.,  ..., 231., 230., 230.],
          ...,
          [ 70.,  69.,  69.,  ...,  80.,  80.,  79.],
          [ 69.,  64.,  56.,  ...,  79.,  79.,  78.],
          [ 69.,  61.,  48.,  ...,  78.,  78.,  78.]],

         [[ 64., 108., 180.,  ..., 170., 169., 169.],
          [113., 147., 201.,  ..., 195., 195., 194.],
          [195., 210., 236.,  ..., 238., 237., 237.],
          ...,
          [ 76.,  75.,  74.,  ...,  93.,  92.,  91.],
          [ 74.,  69.,  60.,  ...,  92.,  91.,  91.],
          [ 73.,  65.,  51.,  ...,  91.,  91.,  91.]],

         [[ 49.,  95., 171.,  ..., 160., 160., 160.],
          [103., 138., 195.,  ..., 188., 188., 188.],
          [191., 207., 234.,  ..., 235., 234., 233.],
          ...,
          [ 72.,  70.,  67.,  ...,  86.,  86.,  85.],
          [ 68.,  61.,  50.,  ...,  85.,  85.,  84.],
          [ 65.,  55.,  39.,  ...

In [21]:
# saving model
PATH = "fsrcnn_phase1.pt"
torch.save({
            'epoch_loss_list': epoch_loss_list,
            'epoch_acc_list': epoch_acc_list,
            'epoch': prev_epochs + num_epochs,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            # 'train_set': train_set,
            # 'test_set': test_set,
            }, PATH)

print(f"model saved as '{PATH}'")
# stores at: C:\Users\athar\AppData\Roaming\Python\Python312\site-packages

model saved as 'fsrcnn_phase1.pt'
