In [2]:
import tensorflow as tf

#### Checking the Tensorflow version

In [3]:
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.20.0


#### Importing the Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

#### Starting with the current directory where data is stored 

In [15]:
os.chdir("../Source")
%pwd

'c:\\Users\\Amreet\\Desktop\\Skin-Cancer-Classification\\Source'

#### Metadata CSV file is loaded as dataset

In [16]:
# disease_df = pd.read_csv('HAM10000_metadata.csv')
disease_df = pd.read_csv('HAM10000_metadata.csv')
disease_df.head(10)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear
5,HAM_0001466,ISIC_0027850,bkl,histo,75.0,male,ear
6,HAM_0002761,ISIC_0029176,bkl,histo,60.0,male,face
7,HAM_0002761,ISIC_0029068,bkl,histo,60.0,male,face
8,HAM_0005132,ISIC_0025837,bkl,histo,70.0,female,back
9,HAM_0005132,ISIC_0025209,bkl,histo,70.0,female,back


#### Checking the columns present inside the dataset

In [17]:
print("Details regarding the Dataset")
disease_df.info()
print("Shape of the Dataset -- ",disease_df.shape)

Details regarding the Dataset
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10015 entries, 0 to 10014
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   lesion_id     10015 non-null  object 
 1   image_id      10015 non-null  object 
 2   dx            10015 non-null  object 
 3   dx_type       10015 non-null  object 
 4   age           9958 non-null   float64
 5   sex           10015 non-null  object 
 6   localization  10015 non-null  object 
dtypes: float64(1), object(6)
memory usage: 547.8+ KB
Shape of the Dataset --  (10015, 7)


#### Dictionary for all 7 types

In [20]:
lesion_type_dict = {
    'nv': 'Melanocytic nevi',
    'mel': 'Melanoma',
    'bkl': 'Benign keratosis-like lesions ',
    'bcc': 'Basal cell carcinoma',
    'akiec': 'Actinic keratoses',
    'vasc': 'Vascular lesions',
    'df': 'Dermatofibroma'
}

In [None]:
disease_directory = 'C://Users//Amreet//Desktop//Skin-Cancer-Classification//Source'

#### Creating the path with the images folder

In [34]:
import glob
img_id_path_dict = {os.path.splitext(os.path.basename(x))[0]: x for x in glob.glob(os.path.join(disease_directory, '*', '*.jpg'))}
img_id_path_dict

{'ISIC_0024306': 'C://Users//Amreet//Desktop//Skin-Cancer-Classification//Source\\HAM10000_images_part_1\\ISIC_0024306.jpg',
 'ISIC_0024307': 'C://Users//Amreet//Desktop//Skin-Cancer-Classification//Source\\HAM10000_images_part_1\\ISIC_0024307.jpg',
 'ISIC_0024308': 'C://Users//Amreet//Desktop//Skin-Cancer-Classification//Source\\HAM10000_images_part_1\\ISIC_0024308.jpg',
 'ISIC_0024309': 'C://Users//Amreet//Desktop//Skin-Cancer-Classification//Source\\HAM10000_images_part_1\\ISIC_0024309.jpg',
 'ISIC_0024310': 'C://Users//Amreet//Desktop//Skin-Cancer-Classification//Source\\HAM10000_images_part_1\\ISIC_0024310.jpg',
 'ISIC_0024311': 'C://Users//Amreet//Desktop//Skin-Cancer-Classification//Source\\HAM10000_images_part_1\\ISIC_0024311.jpg',
 'ISIC_0024312': 'C://Users//Amreet//Desktop//Skin-Cancer-Classification//Source\\HAM10000_images_part_1\\ISIC_0024312.jpg',
 'ISIC_0024313': 'C://Users//Amreet//Desktop//Skin-Cancer-Classification//Source\\HAM10000_images_part_1\\ISIC_0024313.jpg',


#### Creating the column with name "Image_path", "cell_type", "cell_type_idx" and add them to the dataset

In [36]:
disease_df['image_path'] = disease_df['image_id'].map(img_id_path_dict.get)
disease_df['cell_type'] = disease_df['dx'].map(lesion_type_dict.get)
disease_df['cell_type_idx'] = pd.Categorical(disease_df['cell_type']).codes

disease_df.head(10)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_path,cell_type,cell_type_idx
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2
5,HAM_0001466,ISIC_0027850,bkl,histo,75.0,male,ear,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2
6,HAM_0002761,ISIC_0029176,bkl,histo,60.0,male,face,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2
7,HAM_0002761,ISIC_0029068,bkl,histo,60.0,male,face,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2
8,HAM_0005132,ISIC_0025837,bkl,histo,70.0,female,back,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2
9,HAM_0005132,ISIC_0025209,bkl,histo,70.0,female,back,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Benign keratosis-like lesions,2


In [37]:
disease_df.tail(10)

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_path,cell_type,cell_type_idx
10005,HAM_0005579,ISIC_0028393,akiec,histo,80.0,male,face,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Actinic keratoses,0
10006,HAM_0004034,ISIC_0024948,akiec,histo,55.0,female,face,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Actinic keratoses,0
10007,HAM_0001565,ISIC_0028619,akiec,histo,60.0,female,face,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Actinic keratoses,0
10008,HAM_0001576,ISIC_0033705,akiec,histo,60.0,male,face,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Actinic keratoses,0
10009,HAM_0005705,ISIC_0031430,akiec,histo,75.0,female,lower extremity,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Actinic keratoses,0
10010,HAM_0002867,ISIC_0033084,akiec,histo,40.0,male,abdomen,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Actinic keratoses,0
10011,HAM_0002867,ISIC_0033550,akiec,histo,40.0,male,abdomen,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Actinic keratoses,0
10012,HAM_0002867,ISIC_0033536,akiec,histo,40.0,male,abdomen,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Actinic keratoses,0
10013,HAM_0000239,ISIC_0032854,akiec,histo,80.0,male,face,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Actinic keratoses,0
10014,HAM_0003521,ISIC_0032258,mel,histo,70.0,female,back,C://Users//Amreet//Desktop//Skin-Cancer-Classi...,Melanoma,5


#### Saving the dataset as Updated CSV

In [40]:
filename = "HAM10000_images_Updated.csv"
disease_df.to_csv(filename, index=False)

#### Divide the dataset into 2 parts part-1 and part-2

In [42]:
import pandas as pd

# Load the full CSV
df = pd.read_csv("HAM10000_images_Updated.csv")

# Get the total number of rows
n = len(df)
print("Total rows:", n)

# Split into two equal(ish) halves
df1 = df.iloc[:n//2].reset_index(drop=True)   # first half
df2 = df.iloc[n//2:].reset_index(drop=True)   # second half

# Save each half to new CSV files
df1.to_csv("HAM10000_metadata_part1.csv", index=False)
df2.to_csv("HAM10000_metadata_part2.csv", index=False)

print("Files saved: HAM10000_metadata_part1.csv and HAM10000_metadata_part2.csv")


Total rows: 10015
Files saved: HAM10000_metadata_part1.csv and HAM10000_metadata_part2.csv
