### Build and train the ResNeXT 50 and 101 network and retrain it by breast cancer data base

In [None]:
# the model has been developed and examined on the Google Colab
from google.colab import drive
drive.mount('/content/drive')

### import the liberaries

In [None]:
import torch
from torch import nn, optim
from torchvision import datasets ,transforms, models
import numpy as np
import matplotlib.pyplot as plt
import helper
import math
import time as evaltime
from torch import nn
from collections import OrderedDict
import tqdm
from tqdm import tqdm
import pandas as pd

In [None]:
# if the GPU is available train from the GPU Otherwise, choose the CPU
device=torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

###  Load the pre trained model and change the FC layer to taylor the number of classes 2 and 8

In [None]:
# Donwload the pretrained model 
ResNext101_2c= models.resnext101_32x8d(pretrained=True)

# disable the parameters of the pre-trained model to be trained
for params in ResNext101_2c.parameters():
    params.required_grad=False

In [None]:
# adding three linear layer to the end of the model 
classifier= nn.Sequential(OrderedDict([
    ('0', nn.Linear(2048,1024)),
    ('1',nn.ReLU(inplace=True)),
    ('2', nn.Dropout(p=0.5)),
    ('3', nn.Linear(1024,1024)),
    ('4',nn.ReLU(inplace=True)),
    ('5', nn.Dropout(p=0.5)),
    ('6', nn.Linear(1024,2)),
    ('output', nn.LogSoftmax(dim=1))
    
]))
# add the classifier as the fc to the model 
ResNext101_2c.fc=classifier
# make the parameters to learn 
#In order to retrain the model, define the require grad true for all the parameters in the network.
for params in ResNext101_2c.fc.parameters():
    params.required_grad=True

### Define the loss and optimization. 

In [None]:
# using NLLLoss and adam as the loss function and optimization funcion, also learning rate set to 0.0002
critirion= nn.NLLLoss()
optimizer = optim.Adam(ResNext101_2c.parameters(), lr=0.0002)

# transfer the model to the GPU before training if there is any
ResNext101_2c.to(device)


### load the images to the train and test loader, with the ratio of 65/35

In [None]:
# set the locations 
dir_two_Classes='BreaKHis_v1/histology_slides/breast_cancer_images/'

dir_checkpoints='to_save_checkpoints/'
dir_statistics='to_save_statistics/'


# split the train and test datasets with the ration of 90/10
ratio=0.9
# Using the data augmentation techniques
train_transform = transforms.Compose([
    transforms.Resize((256,256)),
    transforms.RandomRotation(45),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406),(0.229, 0.224, 0.225))
])

# save the datset
dataset= datasets.ImageFolder(dir_two_Classes,transform=train_transform)
# split the train and test sample based on the len 
train_sample_len=math.ceil(len(dataset)*ratio)
test_sample_len=len(dataset)-train_sample_len

# Using random split in the torch to split the images into train and test set
trainset,testset=torch.utils.data.random_split(dataset,[train_sample_len,test_sample_len] )

# Using dataloader for the train and test set and set the batch size into 32
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, num_workers=4, shuffle=True)
testloader =torch.utils.data.DataLoader(testset, batch_size=1, num_workers=4, shuffle=False)

# In this work the number of classes are 2
classes=trainset.dataset.classes
# the number of images and the name of classes
print( len(dataset), classes)

### Display an example of image out of all to be confirmed that the trainset is already loaded and Data augmentation has been applied 

In [None]:
images=iter(testloader).next()
type(images)
images[0][0].shape
img=images[0][0].permute(1,2,0)

plt.imshow(img)

## Train the model

In [None]:
epochs = 100
steps= 0
print_every=100
running_loss=0
min_val_loss = np.Inf
results={'epoch':[],'training_loss':[], 'test_loss':[], 'test_accuracy':[], 'training_accuracy':[]}

for e in range(1,epochs+1):
  running_results={'loss':0, 'accuracy':0,'minutes':0,'steps':0}
  ResNext101_2c.train()
  train_bar=tqdm(trainloader)
  running_loss=0

  for images, labels in train_bar:
      start_time= evaltime.time()
      images, labels = images.to(device), labels.to(device)
      running_results['steps']+=1
      optimizer.zero_grad()
      output=ResNext101_2c.forward(images)
      ps=torch.exp(output)
      loss=critirion(output,labels)
      loss.backward()
      optimizer.step()
      # running_loss+=loss.item()
      running_results['loss']+=loss.item()
      equality = (ps.max(dim=1)[1]==labels.data)
      running_results['accuracy']+=equality.type(torch.FloatTensor).mean()
      train_bar.set_description(desc='[%d/%d], training_loss: %.4f, training_accuracy: %.4f '% 
                                (e,epochs, running_results['loss']/running_results['steps'],running_results['accuracy']/running_results['steps']))
  test_bar=tqdm(testloader, desc='Validation Results:')
  ResNext101_2c.eval()
  valing_results={'test_loss':0, 'test_accuracy':0, 'min_loss':0, 'steps':0}
  with torch.no_grad():
    for images, labels in test_bar: 
      valing_results['steps']+=1
      images, labels = images.to(device), labels.to(device)
      output=ResNext101_2c.forward(images)
      loss=critirion(output,labels)
      valing_results['test_loss']+= loss.item()
      ps=torch.exp(output)
      equality = (ps.max(dim=1)[1]==labels.data)
      valing_results['test_accuracy']+=equality.type(torch.FloatTensor).mean()
      test_bar.set_description(desc='test_loss: %.4f, test_accuracy: %.4f'%
                               (valing_results['test_loss']/valing_results['steps'], valing_results['test_accuracy']/valing_results['steps'] ))
    if (valing_results['test_loss']/valing_results['steps']< min_val_loss):
            min_val_loss=valing_results['test_loss']/valing_results['steps']
            print('saving the model with min loss of : '+ str(min_val_loss))
            torch.save(ResNext101_2c.state_dict(),dir_checkpoints+"dataaug_checkpoint_resnext101_32x8d_2c.pth")

     
  results['training_loss'].append(running_results['loss']/running_results['steps'])
  results['test_loss'].append(valing_results['test_loss']/valing_results['steps'])
  results['test_accuracy'].append(valing_results['test_accuracy'].item()/valing_results['steps'])
  results['training_accuracy'].append(running_results['accuracy'].item()/running_results['steps'])
  
  data_frame=pd.DataFrame(
        data={
            # 'Epoch':1,
            'Training_Loss':results['training_loss'],
            'Test_Loss': results['test_loss'],
            'Test_Accuracy':results['test_accuracy'],
            'Training_Accuracy':results['training_accuracy'],
           
                 },
        index=range(1,e+1)
    )
  data_frame.to_csv(dir_statistics+ 'dataaug_Resnext101_32x8d_eval_results.csv',  index_label="Epoch")    
