# Set Up

In [2]:
#The Following cell of code is used everytime FASTAI library is used.
#They tell the notebook to reload any changes made to any libraries used.
#They also ensure that any graphs are plotted are shown in this notebook
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
from fastai.vision.all import *
from fastai.metrics import *
import pandas as pd
from pathlib import Path

In [4]:
csv_path = "/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv"
skin_df = pd.read_csv(csv_path)
skin_df.sort_values(by="image_id")

In [5]:
path = Path('../input/skin-cancer-mnist-ham10000')
Path.BASE_PATH = path
path.ls()

## Rename Variables

In [6]:
short_to_full_name_dict = {
    "akiec" : "Bowen's disease", # very early form of skin cancer 
    "bcc" : "basal cell carcinoma" , # basal-cell cancer or white skin cancer
    "bkl" : "benign keratosis-like lesions", # non-cancerous skin tumour
    "df" : "dermatofibroma", # non-cancerous rounded bumps 
    "mel" : "melanoma", # black skin cancer
    "nv" : "melanocytic nevi", # mole non-cancerous
    "vasc" : "vascular lesions", # skin condition
}

# Get Images from file

In [7]:
# returns only dx and image id column
img_to_class_dict = skin_df.loc[:, ["image_id", "dx"]] 
# returns columns as lists in a dict
img_to_class_dict = img_to_class_dict.to_dict('list')  
# returns a dict mapping image id to disease name
img_to_class_dict = {img_id : short_to_full_name_dict[disease] for img_id,disease in zip(img_to_class_dict['image_id'], img_to_class_dict['dx']) } 
[x for x in img_to_class_dict.items()][:5]

In [8]:
# path.stem returns the filename without suffix
def get_label_from_dict(path):
    return img_to_class_dict[path.stem] 

# Constructing a DataBlock

In [9]:
dblock = DataBlock(
    # Designation the independent and dependent variables
    blocks = (ImageBlock, CategoryBlock), 
    # To get a list of those files,and returns a list of all of the images in that path
    get_items = get_image_files, 
    # Split our training and validation sets randomly
    splitter = RandomSplitter(valid_pct=0.2, seed=42),
    # We are telling fastai what function to call to create the labels in our dataset, in our case is independet variable
    get_y = get_label_from_dict,
    # DihedralItem all 4 90 deg roatations and for each: 
    #2 horizonntal flips -> 8 orientations
    item_tfms=[Resize(448), DihedralItem()],
    # Picks a random scaled crop of an image and resize it to size
    batch_tfms=RandomResizedCrop(size=224, min_scale=0.75, max_scale=1.0))

img_path = "/kaggle/input/skin-cancer-mnist-ham10000"
# create dataloader using img_path   
dls = dblock.dataloaders(img_path, bs=64) # bs = batch size

# Display the images

In [10]:
dls.show_batch(max_n=15)

Observations from these images will be noted below. First, I'll do some more checks to confirm our categories are just "Bowen's disease", 'basal cell carcinoma', 'benign keratosis-like lesions', 'dermatofibroma', 'melanocytic nevi', 'melanoma', 'vascular lesions':

In [11]:
print(dls.vocab)

Let's preview our datasets length:

In [12]:
len(dls.train_ds), len(dls.valid_ds)

# Train a simple model

In [13]:
learn = vision_learner(dls,
                    resnet18,
                    metrics=accuracy)
learn.fine_tune(4)

In [14]:
lr_min,lr_steep = learn.lr_find(suggest_funcs=(minimum, steep))

In [15]:
print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

**Train fit_one_cycle for 3 cycles get an idea of Resenet34's Accuracy.**

In [16]:
learn = vision_learner(dls,resnet34, metrics = accuracy)
learn.fit_one_cycle(3,1e-2) 

# Unfreezing and Transfer Learning

In [17]:
learn.unfreeze()

In [18]:
lr_min,lr_steep = learn.lr_find(suggest_funcs=(minimum, steep))

In [19]:
print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

In [23]:
learn.fit_one_cycle(15 ,lr_max=slice(1e-4, 1e-2))

In [24]:
learn.recorder.plot_loss()

### Saving the Model

In [25]:
learn.export("model21_export.pkl")

In [26]:
learn.save('model22')

# Model Interpretation

In [29]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(8,8), dpi=90)

# Top 6 losses

In [30]:
interp.plot_top_losses(6, nrows=2)

In [31]:
pip install gradio

In [33]:
import gradio as gr
from fastai.vision.all import *
import skimage

learn = load_learner('model21_export.pkl')

labels = learn.dls.vocab
def predict(img):
    img = PILImage.create(img)
    pred,pred_idx,probs = learn.predict(img)
    return {labels[i]: float(probs[i]) for i in range(len(labels))}

title = "Skin Cancer Detector"
article="<p style='text-align: center'><a href='https://github.com/AdiNarendra98' target='_blank'>Blog post</a></p>"
interpretation='default'
enable_queue=True

gr.Interface(fn=predict,inputs=gr.inputs.Image(shape=(28, 28)),outputs=gr.outputs.Label(num_top_classes=3),title=title,article=article,interpretation=interpretation,enable_queue=enable_queue).launch(share=True)

# Reference

[HAM10000 Vision ResNet18](https://www.kaggle.com/code/leonblum/ham10000-vision-resnet18-97-7-accuracy)