In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Covid FastAI Resnet50 Classifier
**Using The SARS-CoV-2 CT-scan dataset for Image Processing and Image Classification Using Convolutional Neural Network**    

 This FastAi classifier provides a Google Colab notebook that uses FastAI and SARS-CoV-2 from  [A large dataset of CT scans for SARS-CoV-2 (COVID-19) identification](https://www.kaggle.com/plameneduardo/sarscov2-ctscan-dataset). 

## FastAI Classifier Project 
## Disclaimer

This project is for research purposes only and the  purpose of the project is to show the potential of Artificial Intelligence for medical support systems such as diagnosis systems this program is fairly accurate and shows good results both on paper and in real world testing, it is not meant to be an alternative to professional medical diagnosis. 

## Dataset

SARS-CoV-2 CT scan dataset is a public dataset, containing 1252 CT scans (computed tomography scan) that are from positive patients for SARS-CoV-2 infection (COVID-19) and 1230 CT scans for SARS-CoV-2 non-infected patients. This dataset was created by our partners at Lancaster University, PlamenLancaster: Professor Angelov Plamen, and Eduardo Soares.

The dataset has been collected from real patients in Sao Paulo, Brazil. The dataset is available in [Kaggle](www.kaggle.com/plameneduardo/sarscov2-ctscan-dataset). 

##  baseline result
 for this dataset Lancaster university LIRA group, [LIRA](https://www.lancaster.ac.uk/lira/) used an eXplainable Deep Learning approach (xDNN), 
 code on [github](https://www.medrxiv.org/content/10.1101/2020.04.24.20078584v2). 





## Import data

We imported dataset from kaggleDataset https://www.kaggle.com/plameneduardo/sarscov2-ctscan-dataset
**SARS-Cov-2 dataset** from kaggle 




In [None]:
pip install --upgrade --quiet pip

In [None]:
!git clone https://github.com/fastai/fastai
    

In [None]:
pip install -e "fastai[dev]"

In [None]:
#hide
!pip install -Uqq fastbook
import fastbook



In [None]:
#hide
from fastai.vision.all import *
from fastbook import *


In [None]:
path=Path('../input/sarscov2-ctscan-dataset')

# **SNEEK PEAK INTO DATASET**

# Import required libraries and the dataset

We used FastAI libraries, running the first code block with do this and get the paths to the dataset files.
Import datset

In [None]:
path.ls()

In [None]:
(path/'COVID').ls()[0]

In [None]:
(path/'non-COVID').ls()[0]

In [None]:
#looking into uploaded dataset images
import PIL

img1 = PIL.Image.open((path/'COVID').ls()[0])
img2 = PIL.Image.open((path/'non-COVID').ls()[0])

img1

In [None]:
img2

In [None]:
pip install jovian --upgrade

In [None]:
import jovian

In [None]:
#below is custom create valid and train dataset dir for loading into fast ai

In [None]:
#fns = get_image_files(path)
#fns

In [None]:
#failed = verify_images(fns)
#failed

In [None]:
# root_dir=Path('/')
# data_dir=root_dir / 'data' / 'ctscan-dataset'
# model_dir= Path('.').absolute() / 'models'

In [None]:
# def valid_func(o:Path):
#     return 'test' in str(o)

# def label_func(o:Path):
#     if 'COVID' in o.name:
#         return 'COVID'
    
#     return 'non-COVID'

## Creating a datablock to load data into the  Model 

 We are then dividing the dataset in the train and valid set and  We are using the random_state argument in order to replicate the result. The valid_pct argument represents the proportion of the dataset to include in the valid (in our case the 20%). Since we set the shufflue argument to True, the data will be shufflued before the split.

Now we need to import the dataset into this notebook. run the following code blocks to import the COVID dataset as a FastAI DataBlock

In the ImageDataBunch.from_name_list function we can see that we pass the dataset_dir we created earlier in the tutorial, fileNames that we created earlier, pattern for the files, some augmentation, the size of the images we need to replicate original model input sizes.
data.show_batch()

This function to show a batch of our data. Run the code block to do this and view the results.

In [None]:
def get_dls(bs,size):
    dblock = DataBlock(blocks=(ImageBlock, CategoryBlock),
                       get_items=get_image_files,
                       get_y=parent_label,
                       splitter=RandomSplitter(valid_pct=0.2, seed=42),
                       item_tfms=Resize(460),
                       #presizing is done 
                       #earlier size was 224 without progressive resizing but now including progressive resizing 
                       batch_tfms=[*aug_transforms(size=size,min_scale=0.75),
                       Normalize.from_stats(*imagenet_stats)])
    return dblock.dataloaders(path,bs=bs)


In [None]:
dls=get_dls(128,128)
#without progressive resizing 
#training data loader for using progressive resizing currently not using it  

In [None]:
dls.show_batch(nrows=3, figsize=(7,6))

# Transfer Learning

# The Resnet50 model

## What and why did we  use transfer learning?

Transfer learning is meaning use a pre-trained model to build our classifier. A pre-trained model is a model that has been previously trained on a dataset. The model comprehends the updated weights and bias.  Using a pre-trained model you are saving time and computational resources. Another avantage is that pre-trained models often perform better that architecture designed from scratch. To better understand this point, suppose we  want to build a classifier able to sort different sailboat types. A model pre-trained on ships would have already capture in its first layers some boat features, learning faster and with better accuracy among the different sailboat types.

## The Resnet50 architecture

Resnet50 generally is considered a good choice as first architecture to test, it shows good performance without an excessive size allowing to use a higher batch size and thus less computation time. For this reason, before to test more complex architectures Resnet50 is a good compromise.

Residual net have been ideated to solve the problem of the **vanishing gradient**. Highly intricate networks with a large number of hidden layer are working effectively in solving complicated tasks. Their structures allow them to catch pattern in complicated data. When we train the network the early layer tend to be trained slower (the gradient are smaller during backpropagation). The initial layers are important because they learn the basic feature of an object (edge, corner and so on). Failing to proper train these layers lead to a decrease in the overall accuracy of the model.

**Residual neural network** have been ideated to solve this issue. The Resnet model presents the possibility to skip the training of some layer during the initial training. The skipped layer is reusing the learned weights from the previous layer. [Original research article](https://arxiv.org/pdf/1512.03385.pdf)

##Test the Resnet34 architecture with our dataset
Now we are going to test how the FastaAI implementation of this architechture works with the COVID dataset.

Create the convolutional neural network
First we will create the convolutional neural network based on this architechture, to do this we can use the following code block which uses FastAI ( cnn_learner previously create_cnn) function. We pass the loaded data, specify the model, pass error_rate & accuracy as a list for the metrics parameter specifying we want to see both error_rate and accuracy, and finally specify a weight decay of 1e-1 (1.0).

## learn.lr_find() & learn.recorder.plot()
Now we will use the [learn.lr_find()](https://docs.fast.ai/basic_train.html#lr_find) function to run LR Finder. LR Finder help to find the best learning rate to use with our network. For more information the [original paper](https://arxiv.org/pdf/1506.01186.pdf). 
As shown from the output of above,  [learn.recorder.plot()](https://docs.fast.ai/basic_train.html#Recorder.plot) function plot the loss over learning rate. Run the following code block to view the graph. The best learning rate should be chosen as the learning rate value where the curve is the steepest. You may try different learning rate values in order to pick up the best.

## learn.fit_one_cycle() & learn.recorder.plot_losses()
The [learn.fit_one_cycle()](https://docs.fast.ai/basic_train.html#fit_one_cycle) function can be used to fit the model. Fit one cycle reach a comparable accuracy faster than th *fit* function in training of complex models. Fit one cycle instead of maintain fix the learning rate during all the iterations is linearly increasing the learning rate and then it is decreasing again (this process is what is called one cycle). Moreover, this learning rate variation is helping in preventing overfitting. We use 5 for the parameter *cyc_len* to specify the number of cycles to run (on cycle can be considered equivalent to an epoch), and *max_lr* to specify the maximum learning rate to use which we set as *0.001*. Fit one cycle varies the learning rate from 10 fold less the maximum learning rate selected. For more information about fit one cycle: [article](https://arxiv.org/pdf/1803.09820.pdf). We then use [learn.recorder.plot_losses()](https://docs.fast.ai/basic_train.html#Recorder.plot_losses) to plot the losses from *fit_one_cycle* as a graph.

In [None]:
learn = cnn_learner(dls, xresnet50, metrics=[error_rate,accuracy], wd=1e-1).to_fp16()

In [None]:
lr_min,lr_steep = learn.lr_find()# l.r finder

In [None]:
print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

In [None]:
learn.fit_one_cycle(4, 3e-3)
# learning with highest l.r training will be faster 

**Test1**

In [None]:
del learn
torch.cuda.empty_cache()

In [None]:
learn = cnn_learner(dls, xresnet50, metrics=[error_rate,accuracy], wd=1e-1).to_fp16()

In [None]:
learn.fit_one_cycle(5, 3e-3)# best, Others Trials
# learning with highest l.r training will be faster 

In [None]:
del learn
torch.cuda.empty_cache()

In [None]:
learn = cnn_learner(dls, xresnet50, metrics=[error_rate,accuracy], wd=1e-1).to_fp16()

In [None]:
learn.fit_one_cycle(5, 8e-3)
# learning with highest l.r training will be faster 

In [None]:
#learn.fit_one_cycle(5, lr_max=0.1)# learning with highest l.r training will be faster 

In [None]:
#learn.fit_one_cycle(5, lr_max=0.1)# learning with highest l.r training will be faster 

### Save the model
We can save the model once it has been trained. 

## ClassificationInterpretation()
We use  [ClassificationInterpretation()](https://docs.fast.ai/vision.learner.html#ClassificationInterpretation) to visualize interpretations of our model.

## interp.plot_top_losses()
We can use [interp.plot_top_losses()](https://docs.fast.ai/vision.learner.html#plot_top_losses) to view our top losses and their details.

## interp.plot_confusion_matrix()
We will use  [interp.plot_confusion_matrix()](https://docs.fast.ai/vision.learner.html#ClassificationInterpretation.plot_confusion_matrix) to display a [confusion matrix](https://en.wikipedia.org/wiki/Confusion_matrix). Below, 2 in the top left square represents  true negatives, while  in the top right square represent false positives, in the bottom left square represents false negatives, and in the bottom right represents true positives.

In [None]:
learn.recorder.plot_loss()# plot curves of training process

In [None]:
interp = ClassificationInterpretation.from_learner(learn)# plot confusion matrix
interp.plot_confusion_matrix(figsize=(12,12), dpi=50)

In [None]:
interp.plot_top_losses(5, nrows=10)# plot top losses 

In [None]:
# preds, y, losses = learn.get_preds(with_loss=True)
# interp = ClassificationInterpretation(learn, preds, y, losses)
# interp.plot_top_losses(9, figsize=(7,7))

#Unfreeze the model

## learn.unfreeze()
Next we use [learn.unfreeze()](https://docs.fast.ai/basic_train.html#Learner.unfreeze) to unfreeze the model. The original model was trained on ImageNet to classify images among 1000 categories. None of these categories is a leukemia cell, for these reason when fast.ai *cnn_learner* function is behind line substituting the last layer with 2 other layers. The last layer is a matrix that has the same size of our data class (*data.c*). Before, we just trained these two layers while the other model's layers were still keeping the downloaded weight. Unfreezing our model allow us to train also these other layers and updates their weights.

## Train the entire (unfrozen) model
Now that we have unfrozen our model, we will use the following code blocks to train the whole model.

##Slice parameter

Initial layers are activated by simple patterns (like edge, lines, circles etc...)  while the following layers are acquiring the ability to recognize more sophisticated patterns. Update too much the weight of these layers would probably decrease our accuracy. The scope of transfer learning is to exploit this ability of a pre-trained model in recognizing particular patterns and to adapt to our dataset. The parameter *slice* allows to apply ** discriminative learning rate**. In other words, we apply a smaller learning rate (in this case, 1e-5) to the earlier layer and a higher learning rate to the last layer.

## Save the model

We save our model after the un-freezing

In [None]:
learn.save("resnet50_c1")

In [None]:
learn.unfreeze()
learn.lr_find()# to finetune my model

In [None]:
print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

In [None]:
learn.dls = get_dls(12, 224)# training on orignal size 

learn.fit_one_cycle( 12, slice(1e-5, 1e-4))

In [None]:
learn.recorder.plot_loss()

In [None]:
interp = ClassificationInterpretation.from_learner(learn)# plot confusion matrix
interp.plot_confusion_matrix(figsize=(12,12), dpi=50)

In [None]:
interp.plot_top_losses(5, nrows=10)# plot top losses 

In [None]:
import jovian 

In [None]:
learn.save('resnet50_run')

In [None]:
learn.export()# for deploying we exported model, creates export.pkl

In [None]:
path=Path()
path.ls(file_exts=".pkl")#get export file path

In [None]:
learn_inf=load_learner(path/'export.pkl')

In [None]:
jovian.commit(project='minor_fast_ai')

# Predicions from Our Model


In [68]:
btn_upload = widgets.FileUpload()
btn_upload

FileUpload(value={}, description='Upload')

In [86]:
img = PILImage.create(btn_upload.data[-1])


In [87]:
img.shape


(200, 322)

In [88]:
out_pl = widgets.Output()
out_pl.clear_output()
with out_pl: display(img.to_thumb(384,404))
out_pl

Output()

In [89]:
dls.vocab


['COVID', 'non-COVID']

In [90]:
pred,pred_idx,probs = learn.predict(img)


In [91]:
lbl_pred = widgets.Label()
lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}'
lbl_pred

Label(value='Prediction: non-COVID; Probability: 0.5765')

In [92]:
btn_run = widgets.Button(description='Classify')
btn_run

Button(description='Classify', style=ButtonStyle())

In [None]:
#Click event handler adds functionality to buttton
def on_click_classify(change):
    img = PILImage.create(btn_upload.data[-1])
    out_pl.clear_output()
    with out_pl: display(img.to_thumb(128,128))
    pred,pred_idx,probs = learn_inf.predict(img)
    lbl_pred.value = f'Prediction: {pred}; Probability: {probs[pred_idx]:.04f}'

btn_run.on_click(on_click_classify)

In [None]:
from fastai.vision.core import *
from ipywidgets import HBox,VBox,widgets,Button,Checkbox,Dropdown,Layout,Box,Output,Label,FileUpload

In [None]:
HeatMp = widgets.Button(description='MAGIC')
HeatMp

In [None]:
VBox([widgets.Label('INPUT YOUR CT SCAN IMAGE FOR DETECTION!'),
      btn_upload, btn_run, out_pl, lbl_pred,widgets.Label('Do You Want to See How our Model Decides which is Covid and Which is not?'),widgets.Label("Click Here"), HeatMp])

In [None]:
jovian.commit(project="minor_fast_ai")

In [None]:
# hyperparams = {
#     'arch_name': 'resnet50',
#     'lr': .1
# }
# jovian.log_hyperparams(hyperparams)


In [None]:
#  metrics = {
#     'epoch':5,
#     'train_loss':0.379,
#     'val_loss': 0.319,
#     'acc': 85.28
# }
# jovian.log_metrics(metrics)

In [None]:
#jovian.commit(project='CT_Scan_Resnet')

In [None]:
jovian.commit(project='my-ct-scan-project')

In [None]:
# dls = get_data(path,480,224)

In [None]:
# db = DataBlock(blocks=(ImageBlock, CategoryBlock),
#                    get_items=get_image_files,
#                    splitter=FuncSplitter(valid_func),
#                    get_y=parent_label,
#                    item_tfms=Resize(460),
#                    batch_tfms=[*aug_transforms(size=224),
#                                Normalize.from_stats(*imagenet_stats)])

# data=db.dataloaders(data_dir)
    

In [None]:
# dls = db.dataloaders(data_dir)
# #dls.show_batch(max_n=9)

In [None]:
# get_dls.show_batch(max_n=9, figsize=(4,4))