**Down Load DataSet**

In [0]:
import gdown

url = 'https://drive.google.com/a/itu.edu.pk/uc?id=1eytbwaLQBv12psV8I-aMkIli9N3bf8nO&export=download'

gdown.download(url,'/content/drive/My Drive/Assignment 05_P2.zip', quiet=False)

**Unzip Dataset**

In [0]:
!unzip  '/content/drive/My Drive/Assignment 05_P2.zip' -d  '/content/drive/My Drive'

**Import Libraries**

In [0]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels

from sklearn.metrics import f1_score

import  numpy as np
from tqdm import notebook
from tqdm import tqdm

**Load Data**

In [0]:
data_dir = '/content/drive/My Drive/A_05_Part_02_Dataset'
#Define transforms for the training data and testing data
train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406],
                                                            [0.229, 0.224, 0.225])])

valid_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])

#pass transform here-in
train_data = datasets.ImageFolder(data_dir + '/Train', transform=train_transforms)
valid_data = datasets.ImageFolder(data_dir + '/Validation', transform=valid_transforms)

#data loaders
trainloader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True,num_workers=2,pin_memory=True)
validloader = torch.utils.data.DataLoader(valid_data, batch_size=16, shuffle=True,num_workers=2,pin_memory=True)

print("Classes: ")
class_names = train_data.classes
print(class_names)

**One_Hot_encoding Function**

In [0]:
def one_hot_encode(X):
  OneHot = []
  for label in X:
    if label == 0:
      OneHot.append([1., 0., 1.])
    if label == 1:
      OneHot.append([0., 1., 0.])
    if label == 2:
      OneHot.append([0., 0., 1.])

  return torch.FloatTensor(OneHot)

**Focal Loss Function**

In [0]:
class FocalLoss(nn.Module):
    def __init__(self, gamma=2, reduce=True):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        self.reduce = reduce

    def forward(self, inputs, targets):
        BCEL = nn.BCEWithLogitsLoss()
        BCE_loss = BCEL(inputs, targets)
        pt = torch.exp(-BCE_loss)
        F_loss = (1-pt)**self.gamma * BCE_loss
        if self.reduce:
            return torch.mean(F_loss)
        else:
            return F_loss

**Training Function**

In [0]:
def training_fun(model,Epochs, optimizer, output_file_name,Criterion=nn.BCEWithLogitsLoss(),isFocal = False ):
  #if you have gpu then you need to convert the network and data to cuda
  #the easiest way is to first check for device and then convert network and data to device
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  model.to(device)

  model.train()
  lossEpochs = []
  accEpochs = []
  lossValid = []
  accValid = []
  for epoch in range(Epochs):  # loop over the dataset multiple times
      correct = 0
      total = 0
      running_loss = 0.0
      pbar = tqdm(enumerate(trainloader))
      for i, data in pbar:
          # get the inputs
          inputs, original_labels = data
          labels = one_hot_encode(original_labels)
          inputs, labels = inputs.to(device), labels.to(device)
          # zero the parameter gradients
          optimizer.zero_grad()
          # In PyTorch, we need to set the gradients to zero before starting to do backpropragation 
          # because PyTorch accumulates the gradients on subsequent backward passes. 
          # This is convenient while training RNNs. 
          # So, the default action is to accumulate the gradients on every loss.backward() call

          # forward + backward + optimize
          outputs = model(inputs)               #----> forward pass
          if isFocal==False:
            loss = Criterion(outputs, labels)   #----> compute loss
          else:
            FL =  FocalLoss()
            loss = FL.forward(outputs,labels)   #----> compute loss
          loss.backward()                     #----> backward pass
          optimizer.step()                    #----> weights update

          sig = nn.Sigmoid()
          val = sig(outputs)
          val[val>=0.5]=1
          val[val<0.5]=0
          total += labels.size(0)

          c_count=0
          for j in range(0,labels.size(0)):
            if(val[j]==labels[j]).sum().item()==3:
              c_count+=1
              correct+=1
          # print statistics
          running_loss += loss.item()
          
          pbar.set_description(
              'Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}\tCorrect: {}\tTotal: {}'.format(
                  epoch, i * len(inputs), len(trainloader.dataset),
                  100. * i / len(trainloader),
                  loss.data.item(),c_count,labels.size(0)))
          

      acc = 100 * correct / total
      print('Training Accuracy of Epoch ' + str(epoch) + ': %d %%' % (acc))
      lossEpochs.append(running_loss)
      accEpochs.append(acc)

      # # Validation Accuracy and Loss
      correct = 0
      total = 0
      valid_loss = 0.0
      confusionMatrix = np.zeros((2, 2));
      with torch.no_grad():
          for valid_data in validloader:
              images, original_labels = data
              labels =  one_hot_encode(original_labels)
              images, labels = images.to(device), labels.to(device)
              outputs = model(images)
              if isFocal==False:
                valid_losss = Criterion(outputs, labels)   #----> compute loss
              else:
                FL =  FocalLoss()
                valid_losss = FL.forward(outputs,labels)   #----> compute loss
              sig = nn.Sigmoid()
              val = sig(outputs)
              val[val>=0.5]=1.
              val[val<0.5]=0.
              # _, predicted = torch.max(outputs.data, 1)
              total += labels.size(0)
              c_count = 0
              for j in range(0,labels.size(0)):
                if(val[j]==labels[j]).sum()==3:
                  c_count+=1
                  correct+=1
      acc = 100 * correct / total
      print('Validation Accuracy of Epoch ' + str(epoch) + ': %d %%' % (acc))
      lossValid.append(valid_loss)
      accValid.append(acc)
      torch.save(model.state_dict(), '/content/drive/My Drive/'+output_file_name)
  plt.plot(lossEpochs)
  plt.plot(lossValid)
  plt.xlabel("No. of Epochs")
  plt.ylabel("Cross Entropy Loss")
  plt.show()

  plt.plot(accEpochs)
  plt.plot(accValid)
  plt.xlabel("No. of Epochs")
  plt.ylabel("Accuracy")
  plt.show()
  print('Finished Training')

**Testing Function**

In [0]:
def testing_fun(model,dataset):
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
  model.to(device)
  correct = 0
  total = 0
  predicted_List=[]
  labels_List=[]
  predicted_cuda_List=[]
  labels_cuda_List=[]
  with torch.no_grad():
      for data in dataset:
          images, original_labels = data
          labels = one_hot_encode(original_labels)
          images, labels = images.to(device), labels.to(device)
          labels_cuda_List.append(labels)
          outputs = model(images)
          sig = nn.Sigmoid()
          val = sig(outputs)
          val[val>=0.5]=1
          val[val<0.5]=0
          predicted_cuda_List.append(val)
          total += labels.size(0)
          c_count = 0
          for j in range(0,labels.size(0)):
            if(val[j]==labels[j]).sum()==3:
              c_count+=1
              correct+=1
  for x in predicted_cuda_List:
    for y in x:
      predicted_List.append(y.cpu().numpy())
  for x in labels_cuda_List:
    for y in x:
      labels_List.append(y.cpu().numpy())

  labels_List = np.asarray(labels_List)
  predicted_List = np.asarray(predicted_List)

  F1_Score = f1_score(y_true = labels_List, y_pred= predicted_List, average='weighted')
  conf_matrix = multilabel_confusion_matrix(labels_List,predicted_List)
  acc = 100 * correct / total
  print('Accuracy : %d %%' % (acc))
  print('F1 Score : %d %%' % (F1_Score))
  print('Covid19\n ', conf_matrix[0])
  print('Normal\n  ',  conf_matrix[1])
  print('Pneumonia\n ',conf_matrix[2])

**Load VGGG16 Model**

In [0]:
vgg16 = models.vgg16(pretrained=False)
fl_neu_inp = vgg16.classifier[0].in_features
features = list(vgg16.classifier)[:-7]
features
features.extend(
    [
     nn.Linear(fl_neu_inp,460),
     nn.ReLU(inplace=True),
     nn.Dropout(inplace=False),
     nn.Linear(460, len(class_names))])
vgg16.classifier = nn.Sequential(*features)

**Set Hyper Parameters**

In [0]:
Epochs = 15
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)

**Train Model With Out Focal Loss Using VGG16**

In [0]:
training_fun(vgg16,Epochs,optimizer,"new_vgg16_new",Criterion=criterion)

**Test On Validation Data**

In [0]:
testing_fun(vgg16,validloader)

**Load Resnet18 Model**

In [0]:
resnet = models.resnet18(pretrained=False)
resnet.fc = nn.Sequential(
  nn.Linear(resnet.fc.in_features, 1050, bias=True),
  nn.ReLU(inplace=True),
  nn.Dropout(p=0.5, inplace=False),
  nn.Linear(1050, len(class_names), bias=True)
)
# Freeze training for all layers
for param in resnet.parameters():
    param.requires_grad = True 

**Setting Hyper Parameters**

In [0]:
Epochs = 15
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.SGD(resnet.parameters(), lr=0.001, momentum=0.9)

**Traing On Resnet18 WithOut Focal Loss**

In [0]:
training_fun(resnet,Epochs,optimizer,"new_resnet_new",Criterion=criterion)

**Testing on Validation Data**

In [0]:
testing_fun(resnet,validloader)

**Changing Weights of VGG16 For Focal_Loss Model**

In [0]:
vgg16 = models.vgg16(pretrained=False)
fl_neu_inp = vgg16.classifier[0].in_features
features = list(vgg16.classifier)[:-7]
features
features.extend(
    [
     nn.Linear(fl_neu_inp,1000),
     nn.ReLU(inplace=True),
     nn.Dropout(inplace=False),
     nn.Linear(1000, len(class_names))])
vgg16.classifier = nn.Sequential(*features)




**Setting Hyper Parameters**

In [0]:
Epochs = 15
optimizer = optim.SGD(vgg16.parameters(), lr=0.001, momentum=0.9)

**Training VGG16 With Focal Loss**

In [0]:
training_fun(vgg16,Epochs,optimizer,"resnet_new",isFocal=True)

**Testing On Validation Data**

In [0]:
testing_fun(vgg16,validloader)

**Changing Weights of Resnet18 for tarining with Focal Loss**

In [0]:
resnet.fc = nn.Sequential(
  nn.Linear(resnet.fc.in_features, 256, bias=True),
  nn.ReLU(inplace=True),
  nn.Dropout(p=0.5, inplace=False),
  nn.Linear(256, len(class_names), bias=True)
)
# Freeze training for all layers
for param in resnet.parameters():
    param.requires_grad = True 

**Setting Hyper Parameters**

In [0]:
Epochs = 15
optimizer = optim.SGD(resnet.parameters(), lr=0.001, momentum=0.9)

**Tarining Resnet18 model With Focal Loss**

In [0]:
training_fun(resnet,Epochs,optimizer,"resnet_new",isFocal=True)

**Testing using Validation Data**

In [0]:
testing_fun(resnet,validloader)

**Loading Test DataSet**

In [0]:

test_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406],
                                                           [0.229, 0.224, 0.225])])
class ImageFolderWithPaths(datasets.ImageFolder):
    # override the __getitem__ method. this is the method that dataloader calls
    def __getitem__(self, index):
        # this is what ImageFolder normally returns 
        original_tuple = super(ImageFolderWithPaths, self).__getitem__(index)
        # the image file path
        path = self.imgs[index][0]
        # make a new tuple that includes original and the path
        tuple_with_path = (original_tuple + (path,))
        return tuple_with_path

test_data = ImageFolderWithPaths('/content/drive/My Drive/A_05_Part_02_Dataset/Test', transform=test_transforms)
testloader = torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=True)

**Predicting Test Dataset**

In [0]:
import os
results = []
with torch.no_grad():
    predicted_labels = []
    for data in testloader:
        images, labels, paths = data
        images, labels = images.to(device), labels.to(device)
        outputs = vgg16(images)

        for predicted_output, path in zip(outputs.data, paths):
          predicted = []
          predicted.append(os.path.basename(path))
  
          predicted_output = torch.sigmoid(predicted_output)
          thresholded_vector = (predicted_output >= 0.5).int()

          thresholded_vector = thresholded_vector.tolist()
          
          # the order in which hidden results are present
          thresholded_vector[1], thresholded_vector[2] = thresholded_vector[2], thresholded_vector[1]
          predicted.extend(thresholded_vector)
          
          predicted_labels.append(predicted)
    print(predicted_labels)
    results = predicted_labels


**Saving Predicted Into csv File**

In [0]:
import pandas as pd
df = pd.DataFrame(predicted_labels)
df.to_csv('MSDS19095_results.csv')