In [None]:
# import os
import numpy as np
import torch
import glob
import torch.nn as nn
import torchvision
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.autograd import Variable
import pandas as pd

In [None]:
# load and transfor training data from standard source
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.RandomHorizontalFlip(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) # normalize image to [-1, 1]

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
batch_size = 8
# dataloader for batch training (mini-batch gradient descent)
trainloader = torch.utils.data.DataLoader(trainset, batch_size= batch_size,
                                          shuffle=True, num_workers=2)
#train_path = './data'
#trainloader = DataLoader(
                         #torchvision.datasets.ImageFolder (train_path, transform = transform),
                         #batch_size = 10, shuffle = True)
# 10 classes in total
classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck') 

In [None]:
# load test data (note that the data has been transformed already)
test_images = torch.load('./test_image.pt')

In [None]:
import matplotlib.pyplot as plt

# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get a random batch of training images
dataiter = iter(trainloader)
images, labels = next(dataiter)

print(images.shape, labels)

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join(f'{classes[labels[j]]:5s}' for j in range(batch_size)))

In [None]:
# import torch.nn as nn
import torch.nn.functional as F

#  set the hyperparameters of CNN
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
# the number of output channels is simply equal to the number of filters used in that layer.
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 6, kernel_size = 5) # input channel=3, out_put channels / num of filter=6, kernel/ size of filter=5*5
        self.pool = nn.MaxPool2d(2, 2) # reduce the image size as factor 2, with a stride 2
        self.conv2 = nn.Conv2d(6, 16, 5) # input channel is 6, out put 16, and the filter size is 5*5
#Output features = [(Input features + 2 * padding - kernel_size) / stride] + 1

        self.fc1 = nn.Linear(16 * 5 * 5, 120) # 16*5*5 is input feautures and 120 is the outputfeaures
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


model = CNN()

In [None]:
# import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.00211, momentum=0.9)

In [None]:

total_epoch = 3
for epoch in range(total_epoch):  # loop over the dataset 'total_epoch' times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0): # for each batch of data
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs) # forward pass
        loss = criterion(outputs, labels) # calc loss
        loss.backward() # back propagation
        optimizer.step() # one step gradient descent

        # print statistics
        running_loss += loss.item()
        if i % 1000 == 999:    # print average loss every 1000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 1000:.3f}')
            running_loss = 0.0

print('Finished Training')

In [None]:
# since now we're testing (not training), we set no_grad to NOT calculate the gradients
with torch.no_grad():
    # calculate outputs by running images through the network
    outputs = model(test_images)
    # the class with the highest probability is what we choose as prediction
    _, predicted = torch.max(outputs.data, 1)
    predicted = np.array([classes[i] for i in predicted])
    
print(predicted)
# show images
imshow(torchvision.utils.make_grid(test_images[:4]))

submission = pd.DataFrame()
submission['label'] = predicted
submission.to_csv("submission.csv", index=True, index_label='id')
submission

# Model type: CNN
meaning of the hyperparameters as follows:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels = 3, out_channels = 6, kernel_size = 5) # input channel=3, out_put channels / num of filter=6, kernel/ size of filter=5*5
        self.pool = nn.MaxPool2d(2, 2) # reduce the image size as factor 2, with a stride 2
        self.conv2 = nn.Conv2d(6, 16, 5) # input channel is 6, out put 16, and the filter size is 5*5
       #Output features = [(Input features + 2 * padding - kernel_size) / stride] + 1
        self.fc1 = nn.Linear(16 * 5 * 5, 120) # 16*5*5 is input feautures and 120 is the outputfeaures
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


model = CNN()

# Number of Iterations (Epochs):

There are 3 iterations/ epochs. It determines how many times the entire training dataset is processed by the model during training.
One epoch consists of one forward pass (computing predictions), one backward pass (computing gradients), and one weight update. In each epoch, the model learns from the entire dataset.
I increase the No of epochs from 1 to 3 in this model and the accuracy score incresed from 0.49 to 0.57. I noticed that when I increased the epoch no, the loss for each epoch decreased.

Learning rate:
I started with increasing the learning rate from 0.0005 to 0.01 because it was too small and would have resulted in a slow convergence rate. The learning rate of 0.01 was too high and caused the model to overshoot the optimal solution. I then adjusted the learning rate to 0.001 and gradually increased it to 0.002. This balance between overshooting and convergence speed worked well in this case.

Optimzie method:
I use Stochastic Gradient Descent (SGD) as the optimize method because it has a faster convergence.
This generally helps me updating the weights of the model.

My Findings:
A Pipeline for Image Classification**

In my image classification task, I've employed a well-established pipeline to achieve accurate results. Here are the key steps I've followed:

1. Data Preprocessing:
   - First, I load and transform the image data from raw vectors into tensors. This transformation is crucial for compatibility with deep learning models.
   - To achieve this, I utilize data loaders and a composition of transformers. These transformers include operations like horizontal flipping, normalization, and conversion to tensors.
   - For instance, horizontal flipping helps diversify the training data, while normalization ensures the input values are within a consistent range.
   
   
  transform = transforms.Compose([
       transforms.RandomHorizontalFlip(),
       transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
       transforms.ToTensor()
  ])


2. Model Selection and Hyperparameters:
   - Next, I select a Convolutional Neural Network (CNN) model that suits my specific task. I've noticed that increasing the number of output channels in the model architecture often leads to improved performance.( Need to learn this parameters thing further as well, the formua behind this??)
   - Setting hyperparameters like learning rates, batch sizes, and model depth plays a crucial role in model training. Experimentation helps identify the best values.

   model = CNN(num_output_channels=64, ...)
   optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
 

3. Loss Function and Optimization:
   - I define a loss function. Cross-entropy loss is my choiceof use in image classification tasks.
   - For optimization, I employ the Stochastic Gradient Descent (SGD) method. SGD is known for its speedy convergence and adaptability to large datasets.
  
   criterion = nn.CrossEntropyLoss()
   optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

4. Training:
   - With the setup complete, I specify the number of epochs for training. Increasing the number of epochs typically leads to improved accuracy on the training data, although it's essential to monitor for overfitting.
   
   num_epochs = 3
   for epoch in range(num_epochs):
       train_model(...)

5. Testing:
   - Finally, I employ the trained model to make predictions on the test data, evaluating its performance on unseen samples.

6. result.the model reaches the accuracy of prediciting of 0.57.
