In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import torch
from torch import nn 
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision.transforms import v2
import pandas as pd
import numpy as np
import cv2 as cv
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
import glob 
from random import sample

In [None]:
img =cv.imread("/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1/ISIC_0028933.jpg")

print(img.shape)
plt.imshow(img[...,::-1])


In [None]:
img_path_list = glob.glob("/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1/*")
print(len(img_path_list))

In [None]:
img_path_sample=sample(img_path_list,9)

In [None]:
plt.figure(figsize=(9,9))
for i in range(9):
    plt.subplot(3,3,i+1)
    img = cv.imread(img_path_sample[i])
    plt.imshow(img[...,::-1])
    plt.title(img.shape)

In [None]:
metadata = pd.read_csv("/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_metadata.csv")
print(metadata.columns)
metadata

In [None]:
metadata.info()

In [None]:
metadata = metadata.astype({'image_id': 'string'})
metadata.info()

In [None]:
f1 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_1/'
f2 = '/kaggle/input/skin-cancer-mnist-ham10000/HAM10000_images_part_2/'

In [None]:
metadata["path"] = metadata["image_id"].apply(lambda x : f1+x+'.jpg' if os.path.exists(f1+x+'.jpg') else f2+x+'.jpg')

In [None]:
metadata["exists"] = metadata["path"].apply(lambda x: os.path.exists(x))

In [None]:
metadata["exists"].value_counts()

In [None]:
metadata

In [None]:
metadata["dx"].value_counts()

Cases include a representative collection of all important diagnostic categories in the realm of pigmented lesions:
    Actinic keratoses and intraepithelial carcinoma / Bowen's disease (akiec)
    basal cell carcinoma (bcc),
    benign keratosis-like lesions (solar lentigines / seborrheic keratoses and lichen-planus like keratoses, bkl)
    dermatofibroma (df)
    melanoma (mel),
    melanocytic nevi (nv)
    and vascular lesions (angiomas, angiokeratomas, pyogenic granulomas and hemorrhage, vasc).

In [None]:
metadata2 = metadata[metadata.dx != "akiec"]

In [None]:
metadata2

In [None]:
metadata2["label"] = metadata["dx"].apply(lambda x: 1 if ((x == "mel") or (x == "bcc")) else 0 )
metadata2[metadata.dx == "mel"]

In [None]:
metadata2.to_csv("metadata.csv")

## Split the dataset

In [None]:
metadata2.label.value_counts()

In [None]:
from sklearn.model_selection import train_test_split

train ,test = train_test_split(metadata2,
                               test_size= 0.2,
                               random_state= 42,
                               stratify=metadata2["label"]
                              )
test

In [None]:
train

In [None]:
train.label.value_counts()

In [None]:
from sklearn.utils import resample
df1 = resample(train[train.label == 1],n_samples=3875,random_state=42,replace=True)
df1

In [None]:
# df2 = train[train.label == 0]
df2 = resample(train[train.label == 0],n_samples=3875,random_state=42,replace=False)
bal_train = pd.concat([df1, df2], axis= 0)
bal_train.reset_index(inplace=True,drop = True)
bal_train

In [None]:
df2.image_id.duplicated().value_counts()

In [None]:
bal_train.label.value_counts()

In [None]:
bal_train.to_csv("balenced_train.csv")
train.to_csv("train.csv")
test.to_csv("test.csv")

## Create Custom Dataset

In [None]:
transform_main = v2.Compose([
    
    v2.Resize(232,interpolation= v2.InterpolationMode.BILINEAR),
    v2.CenterCrop(224),
    v2.Compose([v2.ToImage(), v2.ToDtype(torch.float32, scale=True)]),    #ToTensor()
    v2.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
transform_main

In [None]:
from torchvision.transforms import v2
transform_aug = v2.Compose([
            v2.RandomRotation(45),
            v2.RandomHorizontalFlip(0.5)])
transform_aug

In [None]:
train_transform = v2.Compose([transform_aug,transform_main])
train_transform

In [None]:
class ham(Dataset):
    def __init__(self,csv_dir,transform = None):
        self.csv_dir = csv_dir
        self.datas = pd.read_csv(self.csv_dir)
        self.transform = transform
        
    def __getitem__(self,x):
        
        path = self.datas.path[x]
        img = Image.open(path)
        label = self.datas.label[x]
        label = torch.tensor(label, dtype = torch.float32)
        if self.transform:
            img = self.transform(img)
            
        return img, label
    
    def __len__(self):
        return len(self.datas)
    
train_dataset = ham("/kaggle/working/balenced_train.csv",train_transform)
test_dataset = ham("/kaggle/working/test.csv",transform_main)

In [None]:
# img,label =train_dataset[150]
# print(label)
# print(len(train_dataset))
# plt.imshow(np.array(img))

In [None]:
img,label =train_dataset[150]
label.shape

In [None]:
train_dataloader = DataLoader(dataset= train_dataset,
                              batch_size= 64,
                              shuffle= False
                              )
test_dataloader = DataLoader(dataset= test_dataset,
                              batch_size= 64,
                              shuffle= False
                              )

# Model

In [None]:
# import requests
# request = requests.get("https://raw.githubusercontent.com/Woodman718/FixCaps/main/Module/HAM10000/model.py")
# with open("model.py","wb") as f:
#     f.write(request.content)

In [None]:
Device = "cuda" if torch.cuda.is_available() else "cpu"
Device

In [None]:
import torchvision.models as models
model = models.mobilenet_v3_large(pretrained=True)

In [None]:
from torchinfo import summary
summary(model=model,
        input_size=(1, 3, 224, 224), # make sure this is "input_size", not "input_shape"
        # col_names=["input_size"], # uncomment for smaller output
        col_names=["input_size", "output_size", "num_params", "trainable"],
        col_width=20,
        row_settings=["var_names"])

In [None]:
for param in model.parameters():
    param.requires_grad = False

In [None]:
num_features = model.classifier[0].out_features  # Get output features from previous layer
model.classifier[1] = torch.nn.Linear(model.classifier[1].in_features, 1)

In [None]:
a =torch.rand(1,3,224,224).to(Device)
a

In [None]:
# result = model(a)
# print(result)
# pred =torch.sigmoid(result)
# pred.squeeze(dim =0)

# Lets train the model

In [None]:
from tqdm.notebook import tqdm, trange

In [None]:
from torchmetrics.classification import BinaryPrecision
from torchmetrics.classification import BinaryRecall
from torchmetrics.classification import BinaryAccuracy


In [None]:
n = torch.tensor([])
d = torch.tensor([1,2,3,4])

In [None]:
torch.cat((n,d))

In [None]:
def train(model, device, train_loader, validation_loader, epochs, lr ,name):
    
#     percision = BinaryPrecision().to(device)
#     recall =BinaryRecall().to(device)
#     accuracy =BinaryAccuracy().to(device)
    
    model.to(device)
    loss_fn =  nn.BCEWithLogitsLoss().to(device)
    optimizer = torch.optim.Adam(model.classifier.parameters(), lr=lr)
    train_loss, validation_loss = [], []
    train_acc, validation_acc = [], []
    
    with tqdm(range(epochs), unit='epoch') as tepochs:
        
        tepochs.set_description('Training')
        
        for epoch in tepochs:
            model.train()
            # Keeps track of the running loss
            running_loss = 0.
            correct, total = 0, 0
#             pred_list_train = torch.tensor([]).to(device)
#             target_list_train = torch.tensor([]).to(device)
            
            for data, target in train_loader:
                
                data, target = data.to(device), target.to(device)
                
                output = model(data).squeeze(dim=1)

                pred = torch.sigmoid(output)
                
#                 print(pre)
                
                pred = (pred > 0.5).float()
                
                optimizer.zero_grad()

                loss = loss_fn(output, target)

                loss.backward()

                optimizer.step()

                tepochs.set_postfix(loss=loss.item())
                running_loss += loss.item()  

#                 pred_list_train = torch.cat((pred_list_train,pred),dim=0)
#                 target_list_train = torch.cat((target_list_train,target),dim=0)
                # Get accuracy
#                 print(pred)
#                 print(target)
#                 print(pred_list_train)
#                 print(target_list_train)
                total += target.size(0)
                correct += (pred == target).sum().item()
            
            train_loss.append(running_loss / len(train_loader))  # Append the loss for this epoch (running loss divided by the number of batches e.g. len(train_loader))
            train_acc.append(correct / total)
#             train_percision = percision(pred_list_train,target_list_train)
#             train_recall = recall(pred_list_train,target_list_train)
#             train_accuracy = accuracy(pred_list_train,target_list_train)
        # Evaluate on validation data
            model.eval()
            running_loss = 0.
            correct, total = 0, 0
            
#             pred_list_valid = torch.tensor([]).to(device)
#             target_list_valid = torch.tensor([]).to(device)
            
            for data, target in validation_loader:
                
                data, target = data.to(device), target.to(device)
                
                optimizer.zero_grad()
                
                output = model(data).squeeze(1)
                
                pred = torch.sigmoid(output)
                
                pred = (pred > 0.5).float()
                
                loss = loss_fn(output, target)
                
                tepochs.set_postfix(loss=loss.item())
                
                running_loss += loss.item()
                
#                 pred_list_valid = torch.cat((pred_list_valid,pred),dim=0)
#                 target_list_valid = torch.cat((target_list_valid,target),dim=0)
                # Get accuracy
                total += target.size(0)
                correct += (pred == target).sum().item()

            validation_loss.append(running_loss / len(validation_loader))
            validation_acc.append(correct / total)
#             valid_percision = percision(pred_list_valid,target_list_valid)
#             valid_recall = recall(pred_list_valid,target_list_valid)
#             valid_accuracy = accuracy(pred_list_valid,target_list_valid)
            print(f"Epoches: {epoch}")
            print(f"\nTrain loss: {train_loss[-1]:.5f} | Train acc: {train_acc[-1]:.5f} \n validation_loss: {validation_loss[-1]:.5f} | Test acc: {validation_acc[-1]:.5f}\n")
            if epoch % 1 == 0:
                torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict()
                }, f"./model_resnet50_{name}_e{epoch}.pth")
        
        return train_loss, train_acc , validation_loss, validation_acc


In [None]:
train_loss, train_acc, validation_loss, validation_acc= train(model,Device,train_dataloader,test_dataloader,3, 0.01, "freeze")

In [None]:
def plot_loss_accuracy(train_loss, train_acc,
                       validation_loss, validation_acc):

    epochs = len(train_loss)
    fig, (ax1, ax2) = plt.subplots(1, 2)
    ax1.plot(list(range(epochs)), train_loss, label='Training Loss')
    ax1.plot(list(range(epochs)), validation_loss, label='Validation Loss')
    ax1.set_xlabel('Epochs')
    ax1.set_ylabel('Loss')
    ax1.set_title('Epoch vs Loss')
    ax1.legend()

    ax2.plot(list(range(epochs)), train_acc, label='Training Accuracy')
    ax2.plot(list(range(epochs)), validation_acc, label='Validation Accuracy')
    ax2.set_xlabel('Epochs')
    ax2.set_ylabel('Accuracy')
    ax2.set_title('Epoch vs Accuracy')
    ax2.legend()
    fig.set_size_inches(15.5, 5.5)

plot_loss_accuracy(train_loss, train_acc, validation_loss, validation_acc)