<a href="https://colab.research.google.com/github/HumzaW245/iCAD-Dental/blob/main/iCAD_Dental_Coding_Test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Downloading data using kaggle API

Read comment by 'Nikhil Ojha' for simple instructions on how to do download dataset (https://www.kaggle.com/discussions/general/74235)

In [145]:
! pip install -q kaggle
from google.colab import files
files.upload() #Upload kaggle.json (it contains API key to access kaggle datasets)


from IPython.display import clear_output
clear_output() #To clear output printed since previous lines will show kaggle API key information

In [146]:
!rm -r ~/.kaggle
!mkdir ~/.kaggle
!mv ./kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [147]:
# To download dataset, go to dataset ->https://www.kaggle.com/datasets/salviohexia/isic-2019-skin-lesion-images-for-classification/discussion
# Then click 3 dots menu -> 'Copy API Command'
# since in google colab, need to do ! first -> ! <pastedAPIcommandHere>

! kaggle datasets download -d salviohexia/isic-2019-skin-lesion-images-for-classification

isic-2019-skin-lesion-images-for-classification.zip: Skipping, found more recently modified local copy (use --force to force download)


In [148]:
import zipfile
zip_ref = zipfile.ZipFile('isic-2019-skin-lesion-images-for-classification.zip', 'r')
zip_ref.extractall('/content/ISIC2019')
zip_ref.close()

In [150]:
# ctrl-f 'In [13]' if want details of making a complete dataset using the metadata too -> https://www.kaggle.com/code/shonenkov/merge-external-data/notebook

import pandas as pd

# Specify the path to the CSV files
metadata_csv_file_path = '/content/ISIC2019/ISIC_2019_Training_Metadata.csv'
data_csv_file_path = '/content/ISIC2019/ISIC_2019_Training_GroundTruth.csv'



# Load the metadata and main data from the CSV file
metadata_df = pd.read_csv(metadata_csv_file_path)
main_df = pd.read_csv(data_csv_file_path)


#Getting class name and index column (index is used to make predictions more easily)
main_df['Class'] = main_df.apply(lambda row: main_df.columns[(row == 1)].tolist()[0], axis=1)
class_idx_map = {'MEL':0, 'NV':1, 'BCC':2, 'AK':3, 'BKL':4, 'DF':5, 'VASC':6, 'SCC':7, 'UNK':8}
main_df['Class_Index'] = main_df['Class'].apply(lambda class_name: class_idx_map[class_name])


print(main_df.head(-5))

              image  MEL   NV  BCC   AK  BKL   DF  VASC  SCC  UNK Class  \
0      ISIC_0000000  0.0  1.0  0.0  0.0  0.0  0.0   0.0  0.0  0.0    NV   
1      ISIC_0000001  0.0  1.0  0.0  0.0  0.0  0.0   0.0  0.0  0.0    NV   
2      ISIC_0000002  1.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0  0.0   MEL   
3      ISIC_0000003  0.0  1.0  0.0  0.0  0.0  0.0   0.0  0.0  0.0    NV   
4      ISIC_0000004  1.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0  0.0   MEL   
...             ...  ...  ...  ...  ...  ...  ...   ...  ...  ...   ...   
25321  ISIC_0073240  0.0  1.0  0.0  0.0  0.0  0.0   0.0  0.0  0.0    NV   
25322  ISIC_0073241  1.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0  0.0   MEL   
25323  ISIC_0073244  0.0  1.0  0.0  0.0  0.0  0.0   0.0  0.0  0.0    NV   
25324  ISIC_0073245  0.0  1.0  0.0  0.0  0.0  0.0   0.0  0.0  0.0    NV   
25325  ISIC_0073246  0.0  0.0  1.0  0.0  0.0  0.0   0.0  0.0  0.0   BCC   

       Class_Index  
0                1  
1                1  
2                0  
3              

In [151]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split

# Split the data frame into training and test sets
train_df, test_df = train_test_split(main_df, test_size=0.2, random_state=42)



In [152]:
dataset_dir = '/content/ISIC2019/'

import os
import pandas as pd
from PIL import Image
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

# Define a custom dataset class
class CustomDataset(Dataset):
    def __init__(self, root_dir, dataframe, transform=None):
        self.root_dir = root_dir
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        label = self.dataframe.iloc[idx]['Class_Index']
        class_name = self.dataframe.iloc[idx]['Class']
        #Get label index to tensor
        label = torch.tensor(label)

        #Get image
        img_id = self.dataframe.iloc[idx, 0]
        img_path = os.path.join(self.root_dir, class_name + '/' + img_id + '.jpg')  # Assuming images are in JPEG format
        image = Image.open(img_path).convert('RGB')

        #Apply transformations to image
        if self.transform:
            image = self.transform(image)

        return image, label



In [153]:
train_data = CustomDataset(dataset_dir, train_df, transform = None)

In [154]:
train_data[0]

(<PIL.Image.Image image mode=RGB size=1024x1024>, tensor(3))

In [142]:
train_df

Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK,Class,Class_Index
24937,ISIC_0072646,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,AK,3
12362,ISIC_0033765,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,1
4223,ISIC_0025626,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,1
1355,ISIC_0012325_downsampled,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,1
16166,ISIC_0058703,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,BCC,2
...,...,...,...,...,...,...,...,...,...,...,...,...
21575,ISIC_0067321,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,1
5390,ISIC_0026793,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,1
860,ISIC_0010174,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MEL,0
15795,ISIC_0058124,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,NV,1


In [None]:

/content/ISIC2019/BCC/ISIC_0024331.jpg