In [1]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [2]:
import optuna
from torch.utils.data import Dataset , DataLoader
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [3]:
# Set random seeds for reproducibility
torch.manual_seed(42)

<torch._C.Generator at 0x1ae7f94f890>

In [4]:
df = pd.read_csv("fmnist_small.csv")
df.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,9,0,0,0,0,0,0,0,0,0,...,0,7,0,50,205,196,213,165,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,1,0,0,0,...,142,142,142,21,0,3,0,0,0,0
3,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,8,0,0,0,0,0,0,0,0,0,...,213,203,174,151,188,10,0,0,0,0


### Workflow:
--- 
- We want to use VGG16 on this dataset. But VG16 only takes 3channel 224x224 image

1. Reshape the data from 1D to 2D of size(28,28)
2. Change the data type in np.uint8 cause we have to convert it into PIL image where dtype must be np.uint8
3. Convert the image from 1 channel to 3 channel
4. Convert it then in PIL image of size (3,28,28)
5. Resize the PIL image to (3 , 256 , 256)
6. Center-crop (3 , 224 , 224)
7. Convert it into tensor
8. Scale the tensor
9. Normalize the tensor's with VGG16's given mean and std

In [11]:
# seperate target andf input features
X = df.drop(columns = ['label'] , axis = 1)
y = df['label']

In [6]:
# do the train test split 
X_train , X_test , y_train , y_test = train_test_split(X , y , test_size = 0.2 , random_state = 42)

In [12]:
# convert all these into numpy array 
X_train = X_train.to_numpy()
X_test = X_test.to_numpy() 
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [7]:
# transformations[resize,center-crop,tensor scale,normalize]
from torchvision.transforms import transforms

custom_transform = transforms.Compose([
    transforms.Resize(256), 
    transforms.CenterCrop(224), 
    transforms.ToTensor(), # it convert the image into tensor and also scale the tensor 
    transforms.Normalize(mean = [0.485, 0.456, 0.406] , std = [0.229, 0.224, 0.225]) # from VGG16
])

In [8]:
from PIL import Image
import numpy as np 

In [9]:
class CustomDataset(Dataset): 
    def __init__(self , features , labels , transform = None): 
        self.features = features
        self.labels = torch.tensor(labels, dtype=torch.long)
        self.transform = transform 
    def __len__(self):
        return len(self.features) 
    
    def __getitem__(self , index): 
        feature , label = self.features[index] , self.labels[index]
        
        # resize to (28,28) 
        feature = feature.reshape(28 , 28)
        # change dtype to np.uint8 
        feature = feature.astype(np.uint8)
        # change 1channel to 3 channels
        feature = np.stack([feature] * 3 , axis = -1)
        # convert array to PIL image 
        feature = Image.fromarray(feature)
        # apply transformations
        feature = self.transform(feature)

        return feature , label

In [13]:
train_dataset = CustomDataset(X_train , y_train , custom_transform)
test_dataset = CustomDataset(X_test , y_test , custom_transform)

In [14]:
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True, pin_memory = True)
test_loader = DataLoader(test_dataset, batch_size = 32, pin_memory = True)

In [17]:
# import VGG16 
from torchvision.models import vgg16 , VGG16_Weights

In [20]:
vgg16_model = vgg16(progress = True , weights = VGG16_Weights.DEFAULT)

In [21]:
vgg16_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [22]:
vgg16_model.features

Sequential(
  (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (1): ReLU(inplace=True)
  (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (3): ReLU(inplace=True)
  (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (6): ReLU(inplace=True)
  (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (8): ReLU(inplace=True)
  (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (11): ReLU(inplace=True)
  (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (13): ReLU(inplace=True)
  (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (15): ReLU(inplace=True)
  (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (17): Conv2d(256, 512, kernel_si

In [23]:
vgg16_model.classifier

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)

In [24]:
# freeze the features(weights of convolution layers)
for param in vgg16_model.features.parameters(): 
    param.requires_grad = False

In [26]:
# replace the FC layers with our FC layers
vgg16_model.classifier = nn.Sequential(
    nn.Linear(25088 , 1024),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(1024 , 512),
    nn.ReLU(),
    nn.Dropout(0.5),
    nn.Linear(512 , 10)
)

In [27]:
vgg16_model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [28]:
vgg16_model.to(device)

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [29]:
learning_rate = 0.0001
epochs = 10

In [30]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(vgg16_model.classifier.parameters() , lr = learning_rate)

In [33]:
for epoch in range(epochs): 
    total_epoch_loss = 0
    for batch_features , batch_labels in train_loader: 
        # move the features and labels to GPU 
        batch_features = batch_features.to(device)
        batch_labels = batch_labels.to(device)

        # forward pass 
        outputs = vgg16_model(batch_features)
        # calculate loss 
        loss = criterion(outputs , batch_labels)
        total_epoch_loss += loss.item()

        # clear grad 
        optimizer.zero_grad()
        # backward 
        loss.backward()
        # update weights and bias 
        optimizer.step()

    avg_epoch_loss = total_epoch_loss / len(train_loader)
    print(f"Epoch: {epoch + 1} | Loss: {avg_epoch_loss}")

Epoch: 1 | Loss: 0.33181832775473596
Epoch: 2 | Loss: 0.21286680191755294
Epoch: 3 | Loss: 0.1483735544482867
Epoch: 4 | Loss: 0.09803010982150832
Epoch: 5 | Loss: 0.06411681519200405
Epoch: 6 | Loss: 0.03482172496694451
Epoch: 7 | Loss: 0.030814834049281974
Epoch: 8 | Loss: 0.020970261377903324
Epoch: 9 | Loss: 0.01908998082779969
Epoch: 10 | Loss: 0.011157091001514345


In [34]:
vgg16_model.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [35]:
total = 0
correct = 0

with torch.no_grad(): 
    for batch_features , batch_labels in train_loader:
        # move the features and labels to GPU 
        batch_features = batch_features.to(device)
        batch_labels = batch_labels.to(device)
        # forward pass 
        outputs = vgg16_model(batch_features)

        _ , predicted = torch.max(outputs , 1)
        total = total + batch_labels.shape[0]
        correct = correct + (predicted == batch_labels).sum().item()

print(f"Accuracy on Train Data: {correct/total}")

Accuracy on Train Data: 1.0


In [36]:
total = 0
correct = 0

with torch.no_grad(): 
    for batch_features , batch_labels in test_loader:
        # move the features and labels to GPU 
        batch_features = batch_features.to(device)
        batch_labels = batch_labels.to(device)
        # forward pass 
        outputs = vgg16_model(batch_features)

        _ , predicted = torch.max(outputs , 1)
        total = total + batch_labels.shape[0]
        correct = correct + (predicted == batch_labels).sum().item()

print(f"Accuracy on Test Data: {correct/total}")

Accuracy on Test Data: 0.8925
