In [3]:
from google.colab import drive
drive.mount('/content/drive')

zip_file_path = '/content/drive/MyDrive/archive.zip'


Mounted at /content/drive


In [None]:
!unzip '/content/drive/MyDrive/archive.zip'

In [1]:
import torch
from torch.utils.data import DataLoader,Dataset
from torchvision import transforms
import torch.optim as optim
from PIL import Image
import pandas as pd
import random

In [2]:
class PaperclipDataset(Dataset):
    def __init__(self, train_image_ids, train_csv, images_folder):
        self.train_image_ids = train_image_ids
        self.train_df = pd.read_csv(train_csv)
        self.images_folder = images_folder


    def __len__(self):
        return len(self.train_image_ids)

    def __getitem__(self, idx):

        # reading image from path and label/count for the image ids
        image_id = self.train_image_ids[idx]
        image_path = f"{self.images_folder}/clips-{image_id}.png"
        image = Image.open(image_path)
        label = self.train_df[self.train_df['id'] == image_id]['clip_count'].values[0]


        image_tensor = transforms.ToTensor()(image)
        print("Original image shape:", image_tensor.shape)
        # print(image_tensor.size())

        image_tensor = transforms.Resize((28, 28))(image_tensor)
        # print(image_tensor.size())
        image_tensor = image_tensor[:3, :, :]
        # print(image_tensor.size())

        x = 32  # Last two digits of my roll number (20EE38032)

        # random rotation
        #image_tensor = transforms.RandomRotation(degrees=(-x, x));

        # random horizontal flipping
        #image_tensor = transforms.RandomHorizontalFlip(p=x/ 100);
        #print("Original image shape:", image_tensor.shape)
        # Iterate over the training dataset to compute mean and standard deviation
        #print(len(image_tensor))
        #image_tensor  = torch.stack(list(image_tensor), dim=0)

        # Normalize each channel of the image
        #for i in range(3):
           # image_tensor[i] = (image_tensor[i] - self.mean[i]) / self.std[i]

        #Convert the transformed image to a flattened 1D tensor
        image_tensor = torch.flatten(image_tensor)
        return image_id, image_tensor, label

Dataloader

In [15]:
train_csv = '/content/drive/MyDrive/extracted_files/train.csv'  # Path to train.csv
images_folder = '/content/drive/MyDrive/extracted_files/clips-data-2020/clips'  # Path to the images folder

MLP model and summary drawn

In [4]:
import torch
import torch.nn as nn
import math
class MLP(nn.Module):
    def __init__(self, input_size):
        nn.Module.__init__(self)
        self.layers = []
        self.n=int(math.log(input_size)/math.log(8))
        hidden_size = math.floor(input_size/8)
        for i in range(self.n):
            self.layers.append(nn.Linear(input_size, hidden_size))
            self.layers.append(nn.ReLU())
            input_size = hidden_size
            hidden_size = math.floor(input_size/ 8)

        self.layers.append(nn.Linear(input_size,1))
        self.model = nn.Sequential(*self.layers)

    def forward(self, x):
        for i in range(len(self.layers)):
          x=self.layers[i](x)
          #print(x.size())
          # x= nn.functional.relu(x)
        # x = int(self.layers[i+1])
        return x
        #return self.model(x)

input_size = 28*28*3
model = MLP(input_size)

# Printing the model architecture
print(model)


MLP(
  (model): Sequential(
    (0): Linear(in_features=2352, out_features=294, bias=True)
    (1): ReLU()
    (2): Linear(in_features=294, out_features=36, bias=True)
    (3): ReLU()
    (4): Linear(in_features=36, out_features=4, bias=True)
    (5): ReLU()
    (6): Linear(in_features=4, out_features=1, bias=True)
  )
)


splitting into training set, validation set and testing set

In [5]:
from sklearn.model_selection import train_test_split
import numpy as np
# Load image ids and corresponding counts from train.csv
df = pd.read_csv(train_csv)
img_ids = df['id'].tolist()

train_ids, val_ids = train_test_split(img_ids, test_size=0.2, random_state=42)
#print(np.array(val_ids).shape)
train_dataset= PaperclipDataset(train_ids, train_csv, images_folder)
val_dataset= PaperclipDataset(val_ids, train_csv, images_folder)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
dal_loader= DataLoader(val_dataset, batch_size=8, shuffle=True)

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = MLP(input_size).to(device)

In [7]:
for _,images, labels in train_loader:
    images, labels = images.to(device), labels.to(device)
    #print(images.shape)
    #print(labels.shape)

    # Forward pass
    outputs = model(images)
    #outputs=tensor.cpu()
    outputs= [t.cpu().detach().numpy() for t in outputs]
    outputs = np.vstack(outputs)
    #print(outputs)

    for i in range(5):
         print("Predicted Count:", outputs[i].item())
         print("Ground Truth Count:", labels[i].item())
    break

#summary of the model is printed before



Predicted Count: 0.47074243426322937
Ground Truth Count: 69
Predicted Count: 0.4691500961780548
Ground Truth Count: 25
Predicted Count: 0.46871283650398254
Ground Truth Count: 62
Predicted Count: 0.4699503183364868
Ground Truth Count: 35
Predicted Count: 0.4686519205570221
Ground Truth Count: 9


Sampling Randomly from their respective sets

In [8]:
test_csv='/content/drive/MyDrive/extracted_files/test.csv'
df = pd.read_csv(test_csv)
test_ids = df['id'].tolist()
#print(test_ids)

random.shuffle(train_ids)
random.shuffle(val_ids)
random.shuffle(test_ids)

train_sample=train_ids[:2000]
val_sample=val_ids[:250]
test_sample=test_ids[:250]

Train loader and validation loader

In [10]:
train_dataset= PaperclipDataset(train_sample, train_csv, images_folder)
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)

val_dataset= PaperclipDataset(val_sample, train_csv, images_folder)
val_loader = DataLoader(val_dataset, batch_size=8, shuffle=True)

In [9]:
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [26]:
num_epochs = 20
model = model.to(device)

In [12]:
criterion = nn.MSELoss()

In [None]:
train_losses = np.zeros(num_epochs)
val_losses = np.zeros(num_epochs)

for epoch in range(num_epochs):

    # Training phase
    model.train()
    train_loss = 0.0

    for _, images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        labels = labels.float()

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs.squeeze(), labels)

        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for _, images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            val_loss += criterion(outputs.squeeze(), labels).item()

    train_loss /= len(train_loader)
    val_loss /= len(val_loader)

    train_losses[epoch] = train_loss
    val_losses[epoch] = val_loss

    print(f"Epoch: {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

In [None]:
import matplotlib.pyplot as plt

plt.plot(range(0,20),train_losses)
plt.xlabel('No of epoch')
plt.ylabel('Train Loss')

plt.plot(range(0,20),val_losses)
plt.xlabel('No of epoch')
plt.ylabel('validation Loss')