<a href="https://colab.research.google.com/github/Chinmaya-3141/Capstone-Project/blob/main/Messing_around_21_03_2023_Improving_Capstone_Classification_EfficientNetB4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **EfficientNet-B4**

## Install and import necessary libraries

In [None]:
!python --version
!pip show torch

Python 3.9.16
Name: torch
Version: 1.13.1+cu116
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: packages@pytorch.org
License: BSD-3
Location: /usr/local/lib/python3.9/dist-packages
Requires: typing-extensions
Required-by: efficientnet-pytorch, fastai, torchaudio, torchtext, torchvision



Python 3.9.16\
Name: torch\
Version: 1.13.1+cu116\
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration\
Home-page: https://pytorch.org/ \
Author: PyTorch Team\
Author-email: packages@pytorch.org\
License: BSD-3\
Location: /usr/local/lib/python3.9/dist-packages\
Requires: typing-extensions\
Required-by: fastai, torchaudio, torchtext, torchvision


In [None]:
!pip install torchinfo -qq
!pip install efficientnet_pytorch -qq

In [None]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import time
from numba import cuda
import PIL
from time import time

In [None]:
import numpy as np
import pandas as pd

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import ImageFolder
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, ConcatDataset
from torchsummary import summary as tssum
from torchinfo import summary as tisum

In [None]:
# from torchvision.models import efficientnet_b7 as en7
from efficientnet_pytorch import EfficientNet as en4

## Data Preparation

### Import, transforms, augmentation

In [None]:
# Define the root directory of your dataset
path = '/content/drive/MyDrive/Capstone Data (Shared)/Capstone Data/ResizedData/Resized Data_320x320'

In [None]:
# Check if GPU is available

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Using device: cuda:0


In [None]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f0dd44cecf0>

In [None]:
batchsize = 20
dimension=380
channels = 3

In [None]:
# Transform to apply to complete dataset

transform = transforms.Compose([
    transforms.Resize(380),
    transforms.CenterCrop(380),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5121, 0.5121, 0.5121], std=[0.2298, 0.2298, 0.2298])
])

# Define transform to augment Pneumonia data and then combine it with the rest of the data

augmented_transforms = transforms.Compose([
    transforms.Resize(380),
    transforms.RandomRotation(degrees=20),
    transforms.CenterCrop(380),
    # transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5121, 0.5121, 0.5121], std=[0.2298, 0.2298, 0.2298])
])

### Combining

In [None]:
drop_classes = ['Covid', 'Normal']
list1 = [(path,transform,[]),(path,augmented_transforms,drop_classes)]

In [None]:
class ConcatImageFolder(ConcatDataset):
    
    def __init__(self, folder_transforms):
        # create a list of ImageFolder datasets from the input folder/transform pairs
        datasets = []
        classdict = []
        for folder, transform, drop_classes in folder_transforms:
            data = ImageFolder(folder, transform=transforms.Compose([transform]))
            data.samples = [(x, y) for x, y in data.samples if data.classes[y] not in drop_classes]
            data.classes = [c for c in data.classes if c not in drop_classes]
            datasets.append(data)
            classdict.append(data.class_to_idx)

        
        def combine_dicts(dicts):
          combined_dict = {}
          for dictionary in dicts:
              for key, value in dictionary.items():
                   if key in combined_dict:
                       if value not in combined_dict[key]:
                        combined_dict[key].append(value)
                   else:
                       combined_dict[key] = [value]
          return combined_dict

        
        # call the parent ConcatDataset constructor with the list of datasets
        super().__init__(datasets)

        # store the union of all classes from the constituent datasets
        self.classes = list(set().union(*[dataset.classes for dataset in datasets]))

        self.class_to_idx = combine_dicts(classdict)

In [None]:
datavar = ConcatImageFolder(list1)

In [None]:
datavar.classes

['Covid', 'Pneumonia', 'Normal']

In [None]:
datavar.class_to_idx

{'Covid': [0], 'Normal': [1], 'Pneumonia': [2]}

In [None]:
print(f"Number of images: {len(datavar)}")

Number of images: 9307


### Train-Test-Split

In [None]:
train_data, test_data = torch.utils.data.random_split(datavar, [int(0.8*len(datavar)), len(datavar)-int(0.8*len(datavar))])

In [None]:
# Define the data loaders

train_loader = DataLoader(train_data, batch_size=batchsize, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batchsize, shuffle=False)

In [None]:
# Calculate the mean and standard deviation of the training data

# mean = 0.0
# std = 0.0
# num_samples = 0

# for images, _ in train_loader:
#     batch_samples = images.size(0)
#     images = images.view(batch_samples, images.size(1), -1).to(device)
#     mean += images.mean(2).sum(0).to(device)
#     std += images.std(2).sum(0).to(device)
#     num_samples += batch_samples

# mean /= num_samples
# std /= num_samples

# print("Mean:", mean)
# print("Standard deviation:", std)

## Modelling

### Setup

In [None]:
learning_rate=1e-4

In [None]:
criterion = nn.CrossEntropyLoss()
lr_decay=0.1
# lr_decay=0.99

In [None]:
history_accuracy=[]
history_loss=[]
epochs = 11

In [None]:
#Create a class list

# Creates standard basis
eye = torch.eye(3).to(device)

# Classes
classes=[0,1,2]

In [None]:
type(classes)

list

### Create model, send to GPU

In [None]:
# EfficientNetB7 requires 380*380 images

# model = en4.from_pretrained('efficientnet-b4', num_classes=3)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b4-6ed6700e.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b4-6ed6700e.pth


  0%|          | 0.00/74.4M [00:00<?, ?B/s]

Loaded pretrained weights for efficientnet-b4


In [None]:
file = '/content/drive/MyDrive/Capstone Data (Shared)/Capstone Data/TrainedModelCheckpoints/EN4-Augment-epoch 6 model.pth'

In [None]:
checkpoint = torch.load(file, map_location = 'cuda:0')

In [None]:
model = en4.from_pretrained('efficientnet-b4',num_classes = 3)

Loaded pretrained weights for efficientnet-b4


In [None]:
# Load the model to device

model.to(device)

In [None]:
optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay = lr_decay)

In [None]:
print(model.load_state_dict(checkpoint['model_state_dict']))
print(optimizer.load_state_dict(checkpoint['optimizer_state_dict']))
epoch = checkpoint["epoch"]
# epoch.to(device)
loss = checkpoint['loss']
# loss.to(device)

<All keys matched successfully>
None


AttributeError: ignored

In [None]:
print(tisum(\
            model,\
            input_size = (batchsize,channels,dimension,dimension)\
            )\
      )

### Training

In [None]:
# Train the model

for epoch in range(epochs):  
    running_loss = 0.0
    correct=0
    total=0
    class_correct = list(0. for gvar in classes)
    class_total = list(0. for gvar in classes)
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        t0 = time()
        inputs, labels = inputs.to(device), labels.to(device)
        labels = eye[labels]
        optimizer.zero_grad()
        #torch.cuda.empty_cache()
        outputs = model(inputs)
        loss = criterion(outputs, torch.argmax(labels, 1))
        predicted = torch.argmax(outputs, 1)
        labels = torch.argmax(labels, 1)
        c = (predicted == labels.data).squeeze()
        correct += (predicted == labels).sum().item()
        total += labels.size(0)
        accuracy = float(correct) / float(total)
        
        history_accuracy.append(accuracy)
        history_loss.append(loss)
        
        loss.backward()
        optimizer.step()
        
        for j in range(labels.size(0)):
            label = labels[j]
            class_correct[label] += c[j].item()
            class_total[label] += 1
        
        running_loss += loss.item()
        
        print( "Epoch : ",epoch+1," Batch : ", i+1," Loss :  ",running_loss/(i+1)," Accuracy : ",accuracy,"Time ",round(time()-t0, 2),"s" )
    for k in range(len(classes)):
        if(class_total[k]!=0):
            print('Accuracy of %5s : %.3f %%' % (classes[k], 100 * class_correct[k] / class_total[k]))
        
    print('[%d epoch] Accuracy of the network on the Training images: %.3f %%' % (epoch+1, 100 * correct / total))
    
    if((epoch+1)>3):
      file=f"/content/drive/MyDrive/Capstone Data (Shared)/Capstone Data/TrainedModelCheckpoints/EN4-Augment-epoch {epoch+1} model.pth"
      torch.save({
              'epoch': epoch,
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict(),
              'loss': loss
              }, file)
      

## Visualisation of accuracy and loss

In [None]:
import matplotlib.pyplot as plt

In [None]:
hist_loss = []
for i in history_loss:
  j=i.cpu().detach().numpy()
  hist_loss.append(j)

In [None]:
hist_loss

In [None]:
plt.style.use("ggplot")
plt.figure(figsize=(16,8))
plt.plot(history_accuracy)
plt.plot(hist_loss)

In [None]:
# plt.savefig("LossAndAccuracy",dpi=2048, format = png)

## Model Evaluation

In [None]:
model.eval()

In [None]:
# Evaluate the model on test data

correct = 0
total = 0
predictions=[]
actuals=[]
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images,labels = images.to(device),labels.to(device)
        outputs = model(images)
        predicted = torch.argmax(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        pred_arr = predicted.cpu().detach().numpy()
        actual_arr = labels.cpu().detach().numpy()
        predictions.append(pred_arr)
        actuals.append(actual_arr)

print('Accuracy of the network on the test images: %f %%'% (100 * correct / total))

In [None]:
pred=np.concatenate(predictions)
act=np.concatenate(actuals)

In [None]:
pred_arr = predicted.cpu().detach().numpy()
actual_arr = labels.cpu().detach().numpy()

In [None]:
pd.crosstab(pred,act,rownames=['Predicted'], colnames=['Actual'])

In [None]:
datavar.classes

In [None]:
datavar.class_to_idx