# HW3 Image Classification


# Check GPU Type

In [1]:
!nvidia-smi

Wed Sep 27 01:04:47 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.161.03   Driver Version: 470.161.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   33C    P8    11W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla T4            Off  | 00000000:00:05.0 Off |                    0 |
| N/A   34C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|       

# Download Data


In [5]:
!pip install gdown --upgrade


Downloading...
From (uriginal): https://drive.google.com/uc?id=19ZlT0qm-3rdMRe60ya25xiN-ELOohr8M
From (redirected): https://drive.google.com/uc?id=19ZlT0qm-3rdMRe60ya25xiN-ELOohr8M&confirm=t&uuid=9db31c2c-6d0c-4e49-a381-96c0f7b9c5a9
To: /kaggle/working/data.zip
100%|███████████████████████████████████████| 1.03G/1.03G [00:08<00:00, 115MB/s]


In [6]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google.colab'

In [2]:
!pip install gdown --upgrade


Collecting gdown
  Downloading gdown-4.7.1-py3-none-any.whl (15 kB)
Installing collected packages: gdown
Successfully installed gdown-4.7.1
Downloading...
From (uriginal): https://drive.google.com/uc?id=12sSrmByG2-QzkQn-JgnNEpkrAYsBPchD
From (redirected): https://drive.google.com/uc?id=12sSrmByG2-QzkQn-JgnNEpkrAYsBPchD&confirm=t&uuid=bdfbdf2b-e1c8-4d5a-887a-de7642753130
To: /kaggle/working/data.zip
100%|███████████████████████████████████████| 1.03G/1.03G [00:04<00:00, 223MB/s]


In [3]:
! unzip data.zip

Archive:  data.zip
   creating: data/
  inflating: __MACOSX/._data         
   creating: data/valid/
  inflating: __MACOSX/data/._valid   
  inflating: data/.DS_Store          
  inflating: __MACOSX/data/._.DS_Store  
   creating: data/test/
  inflating: __MACOSX/data/._test    
   creating: data/train/
  inflating: __MACOSX/data/._train   
  inflating: data/valid/5_14.jpg     
  inflating: data/valid/5_209.jpg    
  inflating: data/valid/0_358.jpg    
  inflating: data/valid/5_28.jpg     
  inflating: data/valid/5_235.jpg    
  inflating: data/valid/8_202.jpg    
  inflating: data/valid/2_21.jpg     
  inflating: data/valid/10_6.jpg     
  inflating: data/valid/2_35.jpg     
  inflating: data/valid/8_216.jpg    
  inflating: data/valid/5_221.jpg    
  inflating: data/valid/3_258.jpg    
  inflating: data/valid/3_264.jpg    
  inflating: data/valid/3_270.jpg    
  inflating: data/valid/10_112.jpg   
  inflating: data/valid/8_54.jpg     
  inflating: data/valid/6_79.jpg     
  inflating

# Import Packages

In [4]:
_exp_name = "sample"

In [5]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
import torch

import random

In [6]:
def same_seeds(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

seed = 7777
same_seeds(seed)

# Transforms
Torchvision provides lots of useful utilities for image preprocessing, data *wrapping* as well as data augmentation.

Please refer to PyTorch official website for details about different transforms.

In [8]:


# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    transforms.RandomRotation(30),  # Random rotation within [-15, 15] degrees
    transforms.RandomHorizontalFlip(.5),  # Randomly flip horizontally
    transforms.RandomVerticalFlip(.5),
    transforms.ColorJitter(brightness=0.6, contrast=0.6, saturation=0.6, hue=0.5),  # Adjust brightness, contrast, saturation, and hue
    transforms.RandomAffine(degrees=30, translate=(0.1, 0.1),scale=(0.8, 1.2), shear=15),  # Random affine transformation
  # Random affine transformation
    transforms.RandomResizedCrop(96, scale=(0.8, 1.0)),  # Random resized crop to 128x128
    transforms.GaussianBlur(kernel_size=5),

    # Cutout regularization (randomly mask out a portion of the image)
#     transforms.RandomErasing(p=0.5, scale=(0.02, 0.2), ratio=(0.3, 3.3)),

    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.

    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])


# Datasets
The data is labelled by the name, so we load images and label while calling '__getitem__'

In [9]:

class FoodDataset(Dataset):

    def __init__(self,path,tfm=test_tfm,files = None):
        super(FoodDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
        if files != None:
            self.files = files

        self.transform = tfm

    def __len__(self):
        return len(self.files)

    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname)
        im = self.transform(im)

        try:
            label = int(fname.split("/")[-1].split("_")[0])
        except:
            label = -1 # test has no label

        return im,label

# Model

In [18]:
import torch
import torch.nn as nn

class ConvNet(nn.Module):
    def __init__(self, num_classes):
        super(ConvNet, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.25)
        )
        self.fc1 = nn.Linear(128 * 8 * 8, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, num_classes)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = out.view(out.size(0), -1)
        out = self.fc1(out)
        out = self.fc2(out)
        out = self.fc3(out)
        return out

# Create the model
num_classes =11  # Replace with the number of classes in your dataset
model = ConvNet(num_classes)

# Configurations

In [21]:
from torch.optim.lr_scheduler import CosineAnnealingLR
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize a model, and put it on the device specified.
model = ConvNet(num_classes).to(device)

# The number of batch size.
batch_size = 32

# The number of training epochs.
n_epochs = 13

# If no improvement in 'patience' epochs, early stop.
patience = 4

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=.0001)
scheduler = CosineAnnealingLR(optimizer, T_max=10)

# Dataloader

In [22]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset("./data/train", tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset("./data/valid", tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

# Start Training

In [23]:
import matplotlib.pyplot as plt

# Lists to store training and validation loss and accuracy values
train_losses = []
train_accuracies = []
valid_losses = []
valid_accuracies = []

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []
    train_accs = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()
        #print(imgs.shape,labels.shape)

        # Forward the data. (Make sure data and model are on the same device.)
        logits = model(imgs.to(device))

        # Calculate the cross-entropy loss.
        # We don't need to apply softmax before computing cross-entropy as it is done automatically.
        loss = criterion(logits, labels.to(device))

        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        train_loss.append(loss.item())
        train_accs.append(acc)

    train_loss = sum(train_loss) / len(train_loss)
    train_acc = sum(train_accs) / len(train_accs)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_loss = []
    valid_accs = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, labels = batch
        #imgs = imgs.half()

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            logits = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(logits, labels.to(device))

        # Compute the accuracy for current batch.
        acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

        # Record the loss and accuracy.
        valid_loss.append(loss.item())
        valid_accs.append(acc)
        #break

    # The average loss and accuracy for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_loss) / len(valid_loss)
    valid_acc = sum(valid_accs) / len(valid_accs)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # update logs
    if valid_acc > best_acc:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
    else:
        with open(f"./{_exp_name}_log.txt","a"):
            print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")


    # save models
    if valid_acc > best_acc:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_acc = valid_acc
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

 # Append loss and accuracy values for plotting
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    valid_losses.append(valid_loss)
    valid_accuracies.append(valid_acc)

    scheduler.step()

# Plotting the training and validation loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Train Loss', marker='o')
plt.plot(range(1, len(valid_losses) + 1), valid_losses, label='Valid Loss', marker='o')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('Training and Validation Loss')

# Display the plots
plt.tight_layout()
plt.show()

  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 001/013 ] loss = 2.30068, acc = 0.15119


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 001/013 ] loss = 2.28543, acc = 0.14786
[ Valid | 001/013 ] loss = 2.28543, acc = 0.14786 -> best
Best model found at epoch 0, saving model


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 002/013 ] loss = 2.28248, acc = 0.16891


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 002/013 ] loss = 2.26657, acc = 0.19223
[ Valid | 002/013 ] loss = 2.26657, acc = 0.19223 -> best
Best model found at epoch 1, saving model


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 003/013 ] loss = 2.27562, acc = 0.18046


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 003/013 ] loss = 2.24083, acc = 0.19628
[ Valid | 003/013 ] loss = 2.24083, acc = 0.19628 -> best
Best model found at epoch 2, saving model


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 004/013 ] loss = 2.26815, acc = 0.17787


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 004/013 ] loss = 2.23710, acc = 0.19676
[ Valid | 004/013 ] loss = 2.23710, acc = 0.19676 -> best
Best model found at epoch 3, saving model


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 005/013 ] loss = 2.25418, acc = 0.18451


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 005/013 ] loss = 2.23291, acc = 0.20930
[ Valid | 005/013 ] loss = 2.23291, acc = 0.20930 -> best
Best model found at epoch 4, saving model


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 006/013 ] loss = 2.22842, acc = 0.20601


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 006/013 ] loss = 2.17458, acc = 0.22155
[ Valid | 006/013 ] loss = 2.17458, acc = 0.22155 -> best
Best model found at epoch 5, saving model


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 007/013 ] loss = 2.21101, acc = 0.21157


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 007/013 ] loss = 2.14837, acc = 0.24142
[ Valid | 007/013 ] loss = 2.14837, acc = 0.24142 -> best
Best model found at epoch 6, saving model


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 008/013 ] loss = 2.18863, acc = 0.21612


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 008/013 ] loss = 2.11916, acc = 0.24875
[ Valid | 008/013 ] loss = 2.11916, acc = 0.24875 -> best
Best model found at epoch 7, saving model


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 009/013 ] loss = 2.17839, acc = 0.21911


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 009/013 ] loss = 2.10049, acc = 0.24981
[ Valid | 009/013 ] loss = 2.10049, acc = 0.24981 -> best
Best model found at epoch 8, saving model


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 010/013 ] loss = 2.16627, acc = 0.22514


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 010/013 ] loss = 2.09903, acc = 0.24489
[ Valid | 010/013 ] loss = 2.09903, acc = 0.24489


  0%|          | 0/309 [00:00<?, ?it/s]

[ Train | 011/013 ] loss = 2.15898, acc = 0.22799


  0%|          | 0/108 [00:00<?, ?it/s]

[ Valid | 011/013 ] loss = 2.09879, acc = 0.24363
[ Valid | 011/013 ] loss = 2.09879, acc = 0.24363


  0%|          | 0/309 [00:00<?, ?it/s]

KeyboardInterrupt: 

# Dataloader for test

In [24]:
# Construct test datasets.
# The argument "loader" tells how torchvision reads the data.
test_set = FoodDataset("./data/test", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

# Testing and generate prediction CSV

In [25]:
model_best = ConvNet(num_classes).to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
    for data,_ in tqdm(test_loader):
        test_pred = model_best(data.to(device))
        test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
        prediction += test_label.squeeze().tolist()

  0%|          | 0/47 [00:00<?, ?it/s]

In [28]:
# create test csv
def pad4(i):
    return "0"*(4-len(str(i)))+str(i)
df = pd.DataFrame()
df["Id"] = [pad4(i) for i in range(len(test_set))]
df["Label"] = prediction
df.to_csv("submission.csv",index = False)