In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# Importing Required Packages/Libraries

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import ntpath
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset
import matplotlib.image as mpimg
import cv2
from imgaug import augmenters as iaa
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from torchvision.utils import save_image
from torchvision.io import read_image
import os
import random
from datetime import datetime
from torch import device
import torch.optim as optim
import pandas as pd

In [3]:
example_data = pd.read_csv("/kaggle/input/deep-learning-for-msc-coursework-2022/example.csv")
print(example_data.head())

In [4]:
# make sure you use the GPU (btw check your runtime is a GPU in colab)
use_cuda = True
device = torch.device("cuda" if (use_cuda and torch.cuda.is_available()) else "cpu")

In [5]:
#Data Augmentation
train_transform = transforms.Compose([transforms.RandomRotation(15),
                                      transforms.RandomResizedCrop(32),
                                      transforms.RandomHorizontalFlip(0.7),
                                      transforms.RandomVerticalFlip(0.7),
                                      transforms.ToTensor()])

test_transform = transforms.Compose([transforms.RandomRotation(15),
                                     transforms.RandomResizedCrop(32),
                                     transforms.RandomHorizontalFlip(0.7),
                                     transforms.RandomVerticalFlip(0.7),
                                     transforms.ToTensor()])

#Splitting train & validation data
temp_data = torchvision.datasets.ImageFolder("../input/deep-learning-for-msc-coursework-2022/train/train", train_transform)
train_data, validation_data = torch.utils.data.random_split(temp_data, [1500, 200])

#Fetching test data
test_data = torchvision.datasets.ImageFolder("../input/deep-learning-for-msc-coursework-2022/test", test_transform)

#Loading all data using a dataloader
train_loader = torch.utils.data.DataLoader(train_data, batch_size = 32)
validation_loader = torch.utils.data.DataLoader(validation_data, batch_size = 32)
test_loader = torch.utils.data.DataLoader(test_data, batch_size = 32)

path = '../input/deep-learning-for-msc-coursework-2022/'

In [6]:
def imageshow(img):
    npimg= img.numpy()
    plt.imshow(np.transpose(npimg,(1,2,0)))
    plt.show()

dataiter=iter(train_loader)

images,labels=dataiter.next()
imageshow(torchvision.utils.make_grid(images))

In [7]:
#CNN 
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Conv2d(3,28,3)
        self.pool = nn.MaxPool2d(2,2)
        
        self.conv2=nn.Conv2d(28,16,3)
        self.dropout= nn.Dropout2d(0.2)
        
        self.fc1=nn.Linear(576,1024)
        self.fc2= nn.Linear(1024,60)
        self.fc3 = nn.Linear(60,4)
    
    def forward(self,x):
        x=self.pool(F.relu(self.conv1(x)))
        x=self.pool(F.relu(self.conv2(x)))
        
        x=torch.flatten(x,1)
        
        x= F.relu(self.fc1(x))
        x= F.relu(self.fc2(x))        
        x= self.fc3(x)
        return x

In [8]:
mymodel = CNN()
print(mymodel)

In [9]:
#Loss function and optimiser
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(mymodel.parameters(), lr=0.001, weight_decay=0.01)

In [10]:
train_images, train_labels = next(iter(train_loader))
grid = torchvision.utils.make_grid(train_images)
plt.imshow(np.transpose(grid.numpy(), (1,2,0)), interpolation='nearest')

In [11]:
#Training model
training_loss = []
validation_loss = []

# Number of iterations
num_epochs=25

for epoch in range(num_epochs):
    train_loss = 0
    valid_loss = 0
    
    mymodel.train()
    
    for train_image, train_target in train_loader:
        train_image = train_image
        train_target = train_target
        
        #clearing the gradients
        optimizer.zero_grad()
        
        output = mymodel(train_image)
        
        #calculating the loss and comparing labels
        loss= criterion(output,train_target) 
        
        #backward 
        loss.backward(retain_graph=True)
        loss.backward()
        
        # parameter update
        optimizer.step() 
        
        # Update training loss 
        train_loss += loss.item() * train_image.size(0)
        
#Evaluting the model
    mymodel.eval()

#Calculating the validation loss
    for val_img, val_target in validation_loader:
        val_img = val_img
        val_target = val_target
   
        val_output = mymodel(val_img)
    
        val_loss = criterion(val_output, val_target)
        
        #update validation loss
        valid_loss += val_loss.item() * val_img.size(0)
    
#Calculate average loss across all epochs
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(validation_loader.sampler)
    training_loss.append(train_loss)
    validation_loss.append(valid_loss)
    print('Epoch:{:.0f} \t Training Loss:{:.4f}\tValidation Loss:{:.4f}'.format(epoch+1,train_loss,valid_loss))
    

In [12]:
#Plot training and validation loss
plt.plot(training_loss, label ='Training loss')
plt.plot(validation_loss,label='Validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss at each epoch')
plt.legend()

In [13]:
#Calculating Accuracy
temp = 0
total = 0
with torch.no_grad():
    for val_images,val_labels in validation_loader:
        val_images = val_images
        val_labels = val_labels
        val_outputs = mymodel(val_images)
        _,predicted = torch.max(val_outputs.data,1)
        total+= labels.size(0)
        temp += (predicted == val_labels).sum().item()
    print(f'Accuracy of the network on the validation images :{100 * temp /total}%')

In [14]:
#Saving Model
saved_model = torch.save(mymodel.state_dict(), "CNN.pt")

In [15]:
sub_dataset = torchvision.datasets.ImageFolder(path + 'test/', transform=transforms.Compose([
                                   transforms.ToTensor(),                                   
                                    ]))


sub_dataloader = torch.utils.data.DataLoader(sub_dataset)

#Prediction and Submission part
mymodel.eval()
with torch.no_grad():
    prediction = []
    for image, label in sub_dataloader:
        output = mymodel(image)
        prediction.append(torch.argmax(output,1))
        
submission_df = pd.read_csv( path + 'example.csv')
submission_df.drop(columns='Type', inplace=True)
submission_df['Type'] = np.array(prediction)

#Mapping the label names for numerical targets
def remap(x):
    if x == 0 :
        return 'Cancer'
    if x == 1:
        return 'Connective'
    if x == 2:
        return 'Immune'
    if x == 3:
        return 'Normal'

submission_df['Type'] = submission_df['Type'].apply(remap)

submission_df.to_csv('test.csv',index=False)
print(submission_df)