### Notebook from : https://www.kaggle.com/code/pulavendranselvaraj/covid-noncovid-ct-classification-using-cnn-pytorch

In [7]:
!pip install torchinfo



In [17]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
import PIL
import glob
import os
from IPython.display import Image

import torch
import torch.nn as nn
from torch.utils.data import DataLoader,Dataset
from torchvision.transforms import transforms
from torchinfo import summary

ImportError: cannot import name 'PILLOW_VERSION' from 'PIL' (/opt/homebrew/Caskroom/miniforge/base/envs/pytorch/lib/python3.11/site-packages/PIL/__init__.py)

In [None]:
input_path="/kaggle/input/sarscov2-ctscan-dataset/"
class_labels=os.listdir(input_path)
print(class_labels)

In [None]:
filepaths=glob.glob(input_path+"**/*.*")
print(filepaths[:10])

In [None]:
df=pd.DataFrame()
df['filepaths']=filepaths
df.head(10)

In [None]:
df['filepaths'][0].split("/")[4]

In [None]:
# Performing split operation on each filepath to collect class labels
df['labels']=df['filepaths'].apply(lambda x: x.split("/")[4])

df.head(10)

In [None]:
df.tail()

In [None]:
class_dict={}
for idx,label in enumerate(df['labels'].unique().tolist()):
    class_dict[label]=idx
print(class_dict)

In [None]:
df['labels']=df['labels'].map(class_dict)

df.head(10)

In [None]:
df.tail(10)

In [None]:
X_train,X_test,y_train,y_test=train_test_split(df['filepaths'].values,df['labels'].values,test_size=0.2,random_state=42,shuffle=True)
print(X_train.shape,X_test.shape,y_train.shape,y_test.shape)

In [None]:
# custom dataset
class C_NC_Dataset(Dataset):
    def __init__(self,image_paths,image_labels,image_transforms=None,grayscale=True):
        self.image_paths=image_paths
        self.image_labels=torch.Tensor(image_labels)
        
        if (image_transforms==None) and (grayscale==True):
            self.transform=transforms.Compose([transforms.Grayscale(),
                                                transforms.Resize(size=(250,250)),
                                                transforms.ToTensor()])
        elif grayscale==False:
            self.transform=transforms.Compose([transforms.Resize(size=(250,250)),
                                              transforms.ToTensor()])
        else:
            self.transform=image_transforms
    def __getitem__(self,index):
        current_image_path=self.image_paths[index]
        current_image=PIL.Image.open(current_image_path).convert(mode="RGB")
        transformed_image=self.transform(current_image)
        current_label=self.image_labels[index]
        return transformed_image,current_label
    def __len__(self):
        return len(self.image_paths)

In [None]:
train_dataset=C_NC_Dataset(image_paths=X_train,image_labels=y_train)
val_dataset=C_NC_Dataset(image_paths=X_test,image_labels=y_test)

In [None]:
train_dataset[0][0]

In [None]:
train_dataset[0][1]

In [None]:
train_loader=DataLoader(dataset=train_dataset,batch_size=32,shuffle=True)
val_loader=DataLoader(dataset=val_dataset,batch_size=32,shuffle=True)

In [None]:
print(f"number of batches in train_loader: {len(train_loader)}")
print(f"number of records in train_loader: {len(train_loader.dataset)}")
print("-"*100)
print(f"number of batches in val_loader: {len(val_loader)}")
print(f"number of records in val_loader: {len(val_loader.dataset)}")

In [None]:
# construct model

class Convnet(nn.Module):
    def __init__(self,dropout=0.4):
        super(Convnet,self).__init__()
        self.convnet=nn.Sequential(
            # input shape - (num_batch,1,250,250)
            nn.Conv2d(in_channels=1,out_channels=64,kernel_size=3), # output shape - (num_batch,64,248,248)
            nn.BatchNorm2d(num_features=64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # output shape - (num_batch,64,124,124)
            
            nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3), # output shape - (num_batch,128,122,122)
            nn.BatchNorm2d(num_features=128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # output shape - (num_batch,128,61,61)
            
            nn.Conv2d(in_channels=128,out_channels=256,kernel_size=3), # output shape - (num_batch,256,59,59)
            nn.BatchNorm2d(num_features=256),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # output shape - (num_batch,256,29,29)
            
            nn.Conv2d(in_channels=256,out_channels=512,kernel_size=3), # output shape - (num_batch,512,27,27)
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # output shape - (num_batch,512,13,13)
            
            nn.Conv2d(in_channels=512,out_channels=512,kernel_size=3), # output shape - (num_batch,512,11,11)
            nn.BatchNorm2d(num_features=512),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2), # output shape - (num_batch,512,5,5)
            
            nn.Flatten() # output shape - (num_batch,12800)
        )
        self.classifier=nn.Sequential(
            nn.Dropout(p=dropout),
            nn.Linear(in_features=12800,out_features=512),
            nn.ReLU(),
            nn.Dropout(p=dropout),
            nn.Linear(in_features=512,out_features=256),
            nn.ReLU(),
            nn.Linear(in_features=256,out_features=128),
            nn.ReLU(),
            nn.Linear(in_features=128,out_features=1)
        )
    def forward(self,x):
        x=self.convnet(x)
        x=self.classifier(x)
        return x

In [None]:
model=Convnet(dropout=0.4)
print(model)

In [None]:
summary(model,(32,1,250,250))

In [None]:
device=torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

In [None]:
optimizer=torch.optim.Adam(params=model.parameters(),lr=0.0001)
criterion=nn.BCEWithLogitsLoss()

In [None]:
def model_training(model,train_loader,val_loader,device,optimizer,criterion,batch_size,epochs):
    model=model.to(device)
    history={"train_accuracy":[],"train_loss":[],"val_accuracy":[],"val_loss":[]}
    
    for epoch in range(epochs):
        model.train()
        
        train_accuracy=0
        train_loss=0
        val_accuracy=0
        val_loss=0
        
        for X,y in train_loader:
            X=X.to(device)
            y=y.to(device)
            
            # forward propagation
            outputs=model(X).view(-1)
            pred=torch.sigmoid(outputs)
            pred=torch.round(pred)
            
            # loss computation
            loss=criterion(outputs,y)
            cur_train_loss=loss.item()
            
            # conducting backward propagation and updating model parameters
            optimizer.zero_grad() # setting gradient to zero to avoid gradient accumulating
            loss.backward()
            optimizer.step()
            
            cur_train_accuracy=(pred==y).sum().item()/batch_size
            
            train_accuracy+=cur_train_accuracy
            train_loss+=cur_train_loss
        model.eval()
        with torch.no_grad():
            for X,y in val_loader:
                X=X.to(device)
                y=y.to(device)
                
                outputs=model(X).view(-1)
                pred=torch.sigmoid(outputs)
                pred=torch.round(pred)
                
                loss=criterion(outputs,y)
                cur_val_loss=loss.item()
                cur_val_accuracy=(pred==y).sum().item()/batch_size
                
                val_accuracy+=cur_val_accuracy
                val_loss+=cur_val_loss
        train_accuracy=train_accuracy/len(train_loader)
        train_loss=train_loss/len(train_loader)
        val_accuracy=val_accuracy/len(val_loader)
        val_loss=val_loss/len(val_loader)
        
        print(f"[{epoch+1:>2d}/{epochs:>2d}], train_accuracy:{train_accuracy:>5f}, train_loss:{train_loss:>5f}, val_accuracy:{val_accuracy:>5f}, val_loss:{val_loss:>5f}")
        
        history['train_accuracy'].append(train_accuracy)
        history['train_loss'].append(train_loss)
        history['val_accuracy'].append(val_accuracy)
        history['val_loss'].append(val_loss)
    PATH="/kaggle/working/ConvolutionalNeuralNetwork_model.pt"
    torch.save(model.state_dict(),PATH)
    
    return history

In [None]:
history=model_training(model,train_loader,val_loader,device,optimizer,criterion,batch_size=32,epochs=20)

In [None]:
with plt.style.context(style="ggplot"):
    plt.figure(figsize=(18,8))
    plt.plot(history['train_accuracy'],label='train accuracy')
    plt.plot(history['val_accuracy'],label='val accuracy')
    plt.title(label='Accuracy plots')
    plt.xlabel(xlabel='epochs')
    plt.ylabel(ylabel='accuracy')
    plt.legend()
    plt.show()
    plt.figure(figsize=(18,8))
    plt.plot(history['train_loss'],label='train loss')
    plt.plot(history['val_loss'],label='val loss')
    plt.title(label='Loss plots')
    plt.xlabel(xlabel='epochs')
    plt.ylabel(ylabel='loss')
    plt.legend()
    plt.show()