In [1]:
import cudf
import cupy
import torch
import torch.nn as nn
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import os

In [10]:
!nvidia-smi

Wed Nov  6 12:38:55 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla V100-PCIE-32GB           Off | 00000000:D8:00.0 Off |                    0 |
| N/A   28C    P0              34W / 250W |  17214MiB / 32768MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [3]:

INPUT_IMAGE_CHANNELS = 3
INPUT_IMAGE_DIM = 32

CONV_LAYER1_OUTPUT_CHANNELS = 256
CONV_LAYER1_KERNEL_SIZE = 5
CONV_LAYER1_STRIDE = 1
CONV_LAYER1_PADDING = 0

CONV_MAX_POOL_1_KERNEL_SIZE = 2
CONV_MAX_POOL_1_PADDING_SIZE = 0
CONV_MAX_POOL_1_STRIDE_SIZE = 2

CONV_LAYER2_OUTPUT_CHANNELS = 128
CONV_LAYER2_KERNEL_SIZE = 5
CONV_LAYER2_STRIDE = 1
CONV_LAYER2_PADDING = 2

CONV_MAX_POOL_2_KERNEL_SIZE = 2
CONV_MAX_POOL_2_PADDING_SIZE = 0
CONV_MAX_POOL_2_STRIDE_SIZE = 2

CONV_LAYER3_OUTPUT_CHANNELS = 256
CONV_LAYER3_KERNEL_SIZE = 5
CONV_LAYER3_STRIDE = 1
CONV_LAYER3_PADDING = 1

CONV_MAX_POOL_3_KERNEL_SIZE = 2
CONV_MAX_POOL_3_PADDING_SIZE = 0
CONV_MAX_POOL_3_STRIDE_SIZE = 1

DROP_OUT_RATE = 0.25

LINEAR_LAYER_1_OUTPUT_SIZE = 2048
LINEAR_LAYER_2_OUTPUT_SIZE = 512

# NUMBER OF CLASSES
LINEAR_LAYER_3_OUTPUT_SIZE = 10 # 10 for the amount of classes that are in the dataset



class CNN_Model(nn.Module):
    def __init__(self, num_gpus):
        super(CNN_Model, self).__init__()

        # CONV2D LAYER1 AND CHANGE IN IMAGE DIMENSIONS
        self.conv_layer1 = nn.Conv2d(INPUT_IMAGE_CHANNELS, CONV_LAYER1_OUTPUT_CHANNELS, CONV_LAYER1_KERNEL_SIZE, CONV_LAYER1_STRIDE, CONV_LAYER1_PADDING)
        self.image_dimension = (INPUT_IMAGE_DIM - ((CONV_LAYER1_KERNEL_SIZE) - (2 * CONV_LAYER1_PADDING)))//CONV_LAYER1_STRIDE + 1
        self.image_channel_size = CONV_LAYER1_OUTPUT_CHANNELS
        print(self.image_dimension, self.image_channel_size)


        # MAX POOLING LAYER 1, Change in image dimensions
        self.maxPooling1 = nn.MaxPool2d(CONV_MAX_POOL_1_KERNEL_SIZE, CONV_MAX_POOL_1_STRIDE_SIZE, CONV_MAX_POOL_1_PADDING_SIZE)
        self.dropout1 = nn.Dropout(DROP_OUT_RATE)
        self.image_dimension = (self.image_dimension - ((CONV_MAX_POOL_1_KERNEL_SIZE) - (2 * CONV_MAX_POOL_1_PADDING_SIZE)))//CONV_MAX_POOL_1_STRIDE_SIZE + 1
        print(self.image_dimension, self.image_channel_size)

        
        
        # CONV2D LAYER2 AND CHANGE IN IMAGE DIMENSIONS
        self.conv_layer2 = nn.Conv2d(CONV_LAYER1_OUTPUT_CHANNELS, CONV_LAYER2_OUTPUT_CHANNELS, CONV_LAYER2_KERNEL_SIZE, CONV_LAYER2_STRIDE, CONV_LAYER2_PADDING)
        self.image_dimension = (self.image_dimension - ((CONV_LAYER2_KERNEL_SIZE) - (2 * CONV_LAYER2_PADDING)))//CONV_LAYER2_STRIDE + 1
        self.image_channel_size = CONV_LAYER2_OUTPUT_CHANNELS
        print(self.image_dimension, self.image_channel_size)


        # MAX POOLING LAYER 2 AND CHANGE IN IMAGE DIMENSIONS
        self.maxPooling2 = nn.MaxPool2d(CONV_MAX_POOL_2_KERNEL_SIZE, CONV_MAX_POOL_2_STRIDE_SIZE, CONV_MAX_POOL_2_PADDING_SIZE)
        self.dropout2 = nn.Dropout(DROP_OUT_RATE)
        self.image_dimension = (self.image_dimension - ((CONV_MAX_POOL_2_KERNEL_SIZE) - (2 * CONV_MAX_POOL_2_PADDING_SIZE)))//CONV_MAX_POOL_2_STRIDE_SIZE + 1
        print(self.image_dimension, self.image_channel_size)

        # CONV2D LAYER 3 AND CHANGE IN IMAGE DIMENSIONS
        self.conv_layer3 = nn.Conv2d(CONV_LAYER2_OUTPUT_CHANNELS, CONV_LAYER3_OUTPUT_CHANNELS, CONV_LAYER3_KERNEL_SIZE, CONV_LAYER3_STRIDE, CONV_LAYER3_PADDING)
        self.image_dimension = (self.image_dimension - ((CONV_LAYER3_KERNEL_SIZE) - (2 * CONV_LAYER3_PADDING)))//CONV_LAYER3_STRIDE + 1
        self.image_channel_size = CONV_LAYER3_OUTPUT_CHANNELS
        print(self.image_dimension, self.image_channel_size)
        

        # MAX POOLING LAYER 3 AND CHANGE IN IIMAGE DIMENSIONS
        self.maxPooling3 = nn.MaxPool2d(CONV_MAX_POOL_3_KERNEL_SIZE, CONV_MAX_POOL_3_STRIDE_SIZE, CONV_MAX_POOL_3_PADDING_SIZE)
        self.dropout3 = nn.Dropout(DROP_OUT_RATE)
        self.image_dimension = (self.image_dimension - ((CONV_MAX_POOL_3_KERNEL_SIZE) - (2 * CONV_MAX_POOL_3_PADDING_SIZE)))//CONV_MAX_POOL_3_STRIDE_SIZE + 1
        print(self.image_dimension, self.image_channel_size)



        # Since we flatten the image after the CONV2D Layers, we need to calculate the size of the feature
        # Vector going into the nn.Linear layer
        self.fc1_input_size = self.image_dimension * self.image_dimension * self.image_channel_size
        
        # Fully connected Layers
        self.fc1 = nn.Linear(self.fc1_input_size, LINEAR_LAYER_1_OUTPUT_SIZE)
        self.dropout4 = nn.Dropout(DROP_OUT_RATE)
        print(self.fc1_input_size, LINEAR_LAYER_1_OUTPUT_SIZE)
        
        self.fc2 = nn.Linear(LINEAR_LAYER_1_OUTPUT_SIZE, LINEAR_LAYER_2_OUTPUT_SIZE)
        self.dropout5 = nn.Dropout(DROP_OUT_RATE)
        print(LINEAR_LAYER_1_OUTPUT_SIZE, LINEAR_LAYER_2_OUTPUT_SIZE)

        
        self.fc3 = nn.Linear(LINEAR_LAYER_2_OUTPUT_SIZE, LINEAR_LAYER_3_OUTPUT_SIZE)
        print(LINEAR_LAYER_2_OUTPUT_SIZE, LINEAR_LAYER_3_OUTPUT_SIZE)
        
        pass

    def forward(self, x):
        x = self.conv_layer1(x)
        x = self.maxPooling1(x)
        x = nn.functional.relu(x)
        x = self.dropout1(x)
        
        x = self.conv_layer2(x)
        x = self.maxPooling2(x)
        x = nn.functional.relu(x)
        x = self.dropout2(x)
        
        x = self.conv_layer3(x)
        x = self.maxPooling3(x)
        x = nn.functional.relu(x)
        x = self.dropout3(x)
        
        x = torch.flatten(x, start_dim=1)
        
        x = self.fc1(x)
        x = nn.functional.relu(x)
        x = self.dropout4(x)
        
        x = self.fc2(x)
        x = nn.functional.relu(x)
        x = self.dropout5(x)
        
        x = self.fc3(x)
        x = nn.functional.relu(x)

        return x


In [4]:
# Training Hyperparams
BATCH_SIZE = 10
EPOCHS = 5

# Optimizer Hyperparams
LEARNING_RATE = 10e-3
SGD_MOMENTUM = 0.9 # How much of past velocity to maintain in gradient update

# LR Scheduler Hyperparams
GAMMA = 0.1 # Multiplies previous LR by 0.1
STEP_SIZE = 3000 # Amount of steps before LR is decreased



# if __name__ == "__main__":

device = 'cuda' if torch.cuda.is_available() else 'cpu'
world_size = torch.cuda.device_count()

# In case we have zero cuda GPUs, no way to train the following
if world_size < 1:
    print("No Cuda Devices")
    exit()

dataset_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

# Download dataset if it is not present on the system
DOWNLOAD_DATASET = False
if 'testing' not in os.listdir():
    DOWNLOAD_DATASET = True
    os.mkdir('testing')
if 'dataset' not in os.listdir('testing'):
    DOWNLOAD_DATASET = True
    os.mkdir('testing/dataset')
    
if 'training' not in os.listdir():
    DOWNLOAD_DATASET = True
    os.mkdir('training')
if 'dataset' not in os.listdir('training'):
    DOWNLOAD_DATASET = True
    os.mkdir('training/dataset')
training_data = datasets.CIFAR10(root="training/dataset/", train=True, download=DOWNLOAD_DATASET, transform=dataset_transforms)
testing_data = datasets.CIFAR10(root="training/dataset/", train=False, download=DOWNLOAD_DATASET, transform=dataset_transforms)
datal = DataLoader(training_data, batch_size=BATCH_SIZE)

model = CNN_Model(25)
model.to(device)

# Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=SGD_MOMENTUM, nesterov=True)

# LR Scheduler
learning_rate_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)

# Loss func
criterion = nn.CrossEntropyLoss()

28 256
14 256
14 128
7 128
5 256
4 256
4096 2048
2048 512
512 10


In [5]:
import tqdm
for epoch in range(EPOCHS):
    model.train()
    for datapoint in tqdm.tqdm(datal):
        X = torch.tensor(datapoint[0])
        y = torch.tensor(datapoint[1])
        
        optimizer.zero_grad()
        
        prediction = model(X.to(device))
        loss = criterion(prediction, y.to(device))
        loss.backward()
        optimizer.step()
        
        learning_rate_scheduler.step()

    model.eval()
    test_data_loader = DataLoader(testing_data, batch_size=BATCH_SIZE)
    total_loss = 0
    for point in test_data_loader:
        X = torch.tensor(point[0])
        y = torch.tensor(point[1])
        prediction = model(X.to(device))
        loss = criterion(prediction, y.to(device))
        total_loss += loss

    print(f'epoch {epoch} total loss: {total_loss}')

  X = torch.tensor(datapoint[0])
  y = torch.tensor(datapoint[1])
100%|██████████| 5000/5000 [00:20<00:00, 241.87it/s]
  X = torch.tensor(point[0])
  y = torch.tensor(point[1])


epoch 0 total loss: 1280.2471923828125


100%|██████████| 5000/5000 [00:20<00:00, 244.41it/s]


epoch 1 total loss: 1135.51611328125


100%|██████████| 5000/5000 [00:20<00:00, 243.68it/s]


epoch 2 total loss: 1131.6202392578125


100%|██████████| 5000/5000 [00:20<00:00, 242.43it/s]


epoch 3 total loss: 1131.4256591796875


100%|██████████| 5000/5000 [00:20<00:00, 243.99it/s]


epoch 4 total loss: 1131.42138671875
