In [1]:
# from google.colab import drive
# drive.mount("/content/drive")

In [2]:
#after mounting drive
%cd /content/drive/MyDrive/DA6401/DA6401_A2

/content/drive/MyDrive/DA6401/DA6401_A2


In [2]:
# !echo '/inaturalist_12K/' >> .gitignore

In [3]:
# !unzip /content/drive/MyDrive/DA6401/DA6401_A2/nature_12K.zip -d /content/drive/MyDrive/DA6401/DA6401_A2

In [4]:
import os
from PIL import Image
from collections import Counter

import numpy as np
import matplotlib.pyplot as plt
import random
import torch
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch import nn, optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from torch.utils.data import random_split
from torch.utils.data import SubsetRandomSampler
from torchvision.transforms import ToTensor

In [5]:
# !pip install virtualenv

In [6]:
# !virtualenv /content/drive/MyDrive/.dla2_env

In [7]:
# !bash -c "source /content/drive/MyDrive/.dla2_env/bin/activate && pip install pytorch-lightning"

In [8]:
import sys
sys.path.append("/content/drive/MyDrive/.dla2_env/lib/python3.11/site-packages")

In [9]:
import pytorch_lightning as pl
import torchmetrics
from torchmetrics import Metric

In [10]:
#Block to note how different the image sizes are
# TRAIN_DATA_PATH = "inaturalist_12K/train/"
# image_sizes = set()
# classi = 'Aves'
# base = TRAIN_DATA_PATH+'/'+ classi
# for image_path in os.listdir(base):
#   im = Image.open(os.path.join(base,image_path))
#   width, height = im.size
#   image_sizes.add((width,height))
# len(Counter(image_sizes))

In [11]:
class CustomImageDataset(torch.utils.data.Dataset):
    def __init__(self, dir, transform=None):
        self.data_dir = dir
        self.images = []
        self.labels = []
        self.label_names = []
        i = 0
        for c in os.listdir(dir):
          if c[0] == '.':
            continue
          ims = os.listdir(os.path.join(dir,c))
          self.images.extend(ims)
          self.labels.extend([i]*len(ims))
          self.label_names.append(c)
          i += 1
        self.ohlabels = F.one_hot(torch.tensor(self.labels)).float()
        self.transform = transform

    # Defining the length of the dataset
    def __len__(self):
        return len(self.images)

    # Defining the method to get an item from the dataset
    def __getitem__(self, index):
        label_name = self.label_names[self.labels[index]]
        image_path = os.path.join(
            os.path.join(self.data_dir, label_name),
            self.images[index])
        image = Image.open(image_path)
        image = image.convert("RGB")
        label = self.ohlabels[index]
        # Applying the transform
        if self.transform:
            image = self.transform(image)
        image = np.array(image)

        return image, label

In [12]:
def split_validation(label_array, valid_size = 0.1, seed = 42):
    '''
    This functions splits the data into train and validation stratifiedly
    '''
    train_idx, test_idx = [],[]
    for i,ci in enumerate(np.unique(label_array)):
        indices = np.where(label_array==ci)[0]
        train_len = int(indices.shape[0]*(1-valid_size))
        shuffled = np.random.RandomState(seed+i).permutation(indices)
        train_idx.extend(shuffled[:train_len])
        test_idx.extend(shuffled[train_len:])
    return train_idx,test_idx

In [13]:
# TRAIN_DATA_PATH = "inaturalist_12K/train/"
# transform = transforms.Compose([
#   transforms.Resize((300, 300)),    # Resize feature images to 128x128 pixels
#   transforms.ToTensor()         # Convert the image to a PyTorch tensor
#   # transforms.Normalize(mean=[0.5, 0.5], std=[0.5, 0.5])  # Normalize with mean and std dev
# ])

# dataset = CustomImageDataset(TRAIN_DATA_PATH, transform=transform)
# dataset_length = len(dataset)

# print('Number of training examples:',dataset_length)
# random_index = random.randint(0, dataset_length - 1)
# print(dataset.label_names)
# print(dataset[random_index][1])
# plt.imshow(dataset[random_index][0].transpose(1,2,0))
# plt.show()

In [14]:
# from collections import Counter
# dl = np.array(dataset.labels)
# print("Original distribution")
# print(Counter(dl))
# tri, tei = split_validation(dataset.labels,valid_size=0.2)
# print("Train distribution")
# print(Counter(dl[tri]))
# print("Test distribution")
# print(Counter(dl[tei]))

In [15]:
# from torch.utils.data import DataLoader
# TRAIN_DATA_PATH = "inaturalist_12K/train/"
# TEST_DATA_PATH = "inaturalist_12K/val/"
# training_data = CustomImageDataset(TRAIN_DATA_PATH,transform=transform)
# test_data = CustomImageDataset(TEST_DATA_PATH,transform=transform)
# transform = transforms.Compose([
#   transforms.Resize((300, 300)),    # Resize feature images to 128x128 pixels
#   transforms.ToTensor()             # Convert the image to a PyTorch tensor
# ])
# train_dataloader = DataLoader(training_data, batch_size=6, shuffle=True)
# test_dataloader = DataLoader(test_data, batch_size=6, shuffle=True)

In [16]:
# train_features, train_labels = next(iter(train_dataloader))
# print(f"Feature batch shape: {train_features.size()}")
# print(f"Labels batch shape: {train_labels.size()}")
# img = train_features[0].squeeze()
# label = train_labels[0]
# plt.imshow(np.array(img).transpose(1,2,0))
# plt.show()
# print(f"Label: {label}")

In [17]:
class iNaturalistDataModule(pl.LightningDataModule):
    def __init__(self, train_dir, test_dir, batch_size, num_workers=1):
        super().__init__()
        self.train_dir = train_dir
        self.test_dir = test_dir
        self.train_sampler = None
        self.valid_sampler = None
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.transform = transforms.Compose([
          transforms.Resize((300, 300)),    # Resize feature images to 128x128 pixels
          transforms.ToTensor()             # Convert the image to a PyTorch tensor
        ])

    def setup(self, stage):
        if stage == "fit" or stage is None:
          training_data = CustomImageDataset(
              self.train_dir,transform=self.transform)
          train_idx, val_idx = split_validation(training_data.labels, valid_size = 0.2)
          self.train_sampler, self.val_sampler = SubsetRandomSampler(train_idx), SubsetRandomSampler(val_idx)
          self.train_ds = training_data
        elif stage == "test":
          test_data = CustomImageDataset(
              self.test_dir,transform=self.transform)
          self.test_ds = test_data

    def train_dataloader(self):
        return DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            sampler=self.train_sampler
        )

    def val_dataloader(self):
        return DataLoader(
            self.train_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            sampler=self.val_sampler
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_ds,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
        )


SyntaxError: expected ':' (<ipython-input-17-35f5029e30d8>, line 22)

In [None]:
class CNN(pl.LightningModule):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 10, 16)
        self.conv2 = nn.Conv2d(10, 10, 16)
        self.conv3 = nn.Conv2d(10, 10, 16)
        self.conv4 = nn.Conv2d(10, 10, 16)
        self.conv5 = nn.Conv2d(10, 10, 16)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        with torch.no_grad():
          dummy_input = torch.zeros(1, 3, *input_size)
          out = self._forward_convs(dummy_input)
          flatten_size = out.view(1, -1).shape[1]
        self.fc1 = nn.Linear(flatten_size, 120)
        self.fc2 = nn.Linear(120, num_classes)
        self.loss_fn = nn.CrossEntropyLoss()
        self.accuracy = torchmetrics.Accuracy(task="multiclass", num_classes=num_classes)
        self.f1_score = torchmetrics.F1Score(task="multiclass", num_classes=num_classes)

    def _forward_convs(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = self.pool(F.relu(self.conv5(x)))
        return x
    def forward(self, x):
        x = self._forward_convs(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        return x

    def training_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        accuracy = self.accuracy(scores, y)
        f1_score = self.f1_score(scores, y)
        self.log_dict({'train_loss': loss, 'train_accuracy': accuracy, 'train_f1_score': f1_score},
                      on_step=False, on_epoch=True, prog_bar=True)
        return {'loss': loss, "scores": scores, "y": y}

    def validation_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log('val_loss', loss)
        return loss

    def test_step(self, batch, batch_idx):
        loss, scores, y = self._common_step(batch, batch_idx)
        self.log('test_loss', loss)
        return loss

    def _common_step(self, batch, batch_idx):
        x, y = batch
        # x = x.reshape(x.size(0), -1)
        scores = self.forward(x)
        loss = self.loss_fn(scores, y)
        return loss, scores, y

    def predict_step(self, batch, batch_idx):
        x, y = batch
        # x = x.reshape(x.size(0), -1)
        scores = self.forward(x)
        preds = torch.argmax(scores, dim=1)
        return preds

    def configure_optimizers(self):
        return optim.Adam(self.parameters(), lr=0.001)



In [None]:
# Set device cuda for GPU if it's available otherwise run on the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

TRAIN_DATA_PATH = "inaturalist_12K/train/"
TEST_DATA_PATH = "inaturalist_12K/val/"

# Hyperparameters
input_size = 300*300
num_classes = 10
learning_rate = 0.001
batch_size = 64
num_epochs = 3

model = CNN(input_size=input_size, num_classes=num_classes)
dm = iNaturalistDataModule(
    train_dir=TRAIN_DATA_PATH, test_dir=TEST_DATA_PATH,
    batch_size=batch_size, num_workers=4)
trainer = pl.Trainer(accelerator="gpu", devices=1, min_epochs=1, max_epochs=3, fast_dev_run=True, precision=16)
trainer.fit(model, dm)
trainer.validate(model, dm)
trainer.test(model, dm)

In [None]:
# username="JG-0212"
# passkey="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"
# repository="DA6401_A2"

In [None]:
# !git config user.email "jpsai6594@gmail.com"
# !git config user.name "Jayagowtham"

In [None]:
# !git clone https://{passkey}@github.com/{username}/{repository}.git

In [7]:
!git reset HEAD~1

Unstaged changes after reset:
M	Assignment_2.ipynb


In [4]:
!git add .

In [6]:
!git status

On branch main
Your branch is ahead of 'origin/main' by 1 commit.
  (use "git push" to publish your local commits)

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   Assignment_2.ipynb[m

no changes added to commit (use "git add" and/or "git commit -a")


In [5]:
!git commit -m "Full training network setup. Working fine"

[main 091c637] Full training network setup. Working fine
 43 files changed, 22 insertions(+), 1 deletion(-)
 rewrite Assignment_2.ipynb (99%)
 create mode 100644 lightning_logs/version_0/events.out.tfevents.1743856933.edb318a7c3f5.231.0
 create mode 100644 lightning_logs/version_0/hparams.yaml
 create mode 100644 lightning_logs/version_1/events.out.tfevents.1743858159.edb318a7c3f5.231.1
 create mode 100644 lightning_logs/version_1/hparams.yaml
 create mode 100644 lightning_logs/version_10/events.out.tfevents.1743859345.edb318a7c3f5.231.10
 create mode 100644 lightning_logs/version_10/hparams.yaml
 create mode 100644 lightning_logs/version_11/events.out.tfevents.1743859375.edb318a7c3f5.231.11
 create mode 100644 lightning_logs/version_11/hparams.yaml
 create mode 100644 lightning_logs/version_12/events.out.tfevents.1743859438.edb318a7c3f5.231.12
 create mode 100644 lightning_logs/version_12/hparams.yaml
 create mode 100644 lightning_logs/version_13/events.out.tfevents.1743859497.edb318a

In [None]:
!git push origin