### Importing LIB

In [169]:
import sys 
!{sys.executable} -m pip install numpy pandas tensorflow scikit-learn imblearn
import os
import time
import shutil
import itertools
import sklearn
import cv2
import numpy as np 
import pandas as pd
import seaborn as sns
import imblearn
sns.set_style("darkgrid")
import matplotlib.pyplot as plt
import tensorflow as ts
from tensorflow import keras
from tensorflow.keras.layers import Conv2D ,MaxPooling2D , Flatten , Dense , Activation , Dropout , BatchNormalization
from tensorflow.keras.models import Model , load_model , Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import confusion_matrix , classification_report
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam , Adamax
from tensorflow.keras import regularizers
from tensorflow.keras.metrics import categorical_crossentropy

import warnings

warnings.filterwarnings("ignore")

### Quick analysis of the data

In [170]:
data= "archive\hmnist_28_28_RGB.csv"

data = pd.read_csv(data)

data.head()

Unnamed: 0,pixel0000,pixel0001,pixel0002,pixel0003,pixel0004,pixel0005,pixel0006,pixel0007,pixel0008,pixel0009,...,pixel2343,pixel2344,pixel2345,pixel2346,pixel2347,pixel2348,pixel2349,pixel2350,pixel2351,label
0,192,153,193,195,155,192,197,154,185,202,...,173,124,138,183,147,166,185,154,177,2
1,25,14,30,68,48,75,123,93,126,158,...,60,39,55,25,14,28,25,14,27,2
2,192,138,153,200,145,163,201,142,160,206,...,167,129,143,159,124,142,136,104,117,2
3,38,19,30,95,59,72,143,103,119,171,...,44,26,36,25,12,17,25,12,15,2
4,158,113,139,194,144,174,215,162,191,225,...,209,166,185,172,135,149,109,78,92,2


### Separating the Target variable 

In machine learning, for an obvious reason, you need to separate the target variable before training the model on it. This ensures that the model learns patterns from the input features rather than memorizing the target values.

In [171]:
Label = data['label']

Data = data.drop(columns =['label'] )


### Reshaping the data for an image of 28×28

In [172]:

from imblearn.over_sampling import RandomOverSampler  
from imblearn.under_sampling import RandomUnderSampler

Data = np.array(Data).reshape(-1 ,28,28,3)

print("Shape Data :", Data.shape)

Shape Data : (10015, 28, 28, 3)


In [173]:
Label = np.array(Label)

Label


array([2, 2, 2, ..., 0, 0, 6], dtype=int64)

### Putting labels for the different types of skin diseases

In [174]:
classes = {
4   : {'nv' : 'melanocytic nevi'},
6   : {"mel": 'melanoma'},
2   : {"bkl": 'benign keratosis-like lesions'},
1   : {"bcc": 'basal cell carcinoma'},
5   : {"vasc":"pyogenic granulomas and hemorrhage"},
0   : {"akiec" : "Actinic keratoses ans intraepithelial carcinomae"},
3   : {"df"  : "dermatofibroma"}


}
 

### Creation of the Train/Test dataset variables

In [175]:
from sklearn.model_selection import train_test_split



X_train , X_test , y_train , y_test = train_test_split(Data ,
                                                        Label ,
                                                        test_size= 0.20,
                                                        random_state=23) 

### Fixing the Class Imbalance in the Dataset

For better results, it is recommended to have a balanced number of samples across different classes in the dataset. To address class imbalance, we can use under-sampling or over-sampling techniques.

Under-sampling: Reduces the number of samples in the majority class to match the minority class.

Over-sampling: Increases the number of samples in the minority class by duplicating or generating new samples (e.g., using SMOTE).

In [176]:
from collections import Counter
from imblearn.over_sampling import SMOTE

# Flatten the 4D data 
X_train_flat = X_train.reshape(X_train.shape[0], -1)

# Apply SMOTE on the flattened data
X_train_res_flat, y_train_res = smote.fit_resample(X_train_flat, y_train)

# Reshape X_train_res_flat back to 4D
X_train_res = X_train_res_flat.reshape(X_train_res_flat.shape[0], X_train.shape[1], X_train.shape[2], X_train.shape[3])

# Check the new distribution of the training data
print(f"Original dataset shape: {Counter(y_train)}")
print(f"Resampled dataset shape: {Counter(y_train_res)}")


Original dataset shape: Counter({4: 5348, 6: 901, 2: 867, 1: 414, 0: 269, 5: 118, 3: 95})
Resampled dataset shape: Counter({4: 5348, 2: 5348, 6: 5348, 0: 5348, 5: 5348, 1: 5348, 3: 5348})


In [None]:
print(f'X_train shape :{X_train.shape} \n X_test shape: {X_test}')

print(f'y_train shape :{y_train.shape} \n y_test shape: {y_test}')

### Encode the variables in categorical type

In [178]:


from keras.utils import to_categorical

y_train = to_categorical(y_train)

y_test =  to_categorical(y_test)

In [179]:
datagen = ImageDataGenerator(rescale=( 1./255),
                            rotation_range = 10,
                            zoom_range = 0.1,
                            width_shift_range = 0.1,
                            height_shift_range = 0.1)

testgen = ImageDataGenerator(rescale=( 1./255))
                            

### Setting up the learning rate reduction function

In [180]:
from keras.callbacks import ReduceLROnPlateau


learning_rate_reduction = ReduceLROnPlateau(monitor = 'val_accuracy',
                                                patience = 2,
                                                verbose = 1,
                                                factor =  0.5,
                                                min_lr = 0.0001)



### Creation of the model

In [None]:
model = keras.models.Sequential()



model.add(keras.layers.Input(shape=[28 , 28, 3]))
model.add(keras.layers.Conv2D(32, (3,3), activation= 'relu', padding='same', kernel_initializer='he_normal'))
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.BatchNormalization())


model.add(keras.layers.Conv2D(64, (3,3), activation= 'relu', padding='same', kernel_initializer='he_normal'))
model.add(keras.layers.Conv2D(64, (3,3), activation= 'relu', padding='same', kernel_initializer='he_normal'))
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.BatchNormalization())


model.add(keras.layers.Conv2D(128, (3,3), activation= 'relu', padding='same', kernel_initializer='he_normal'))
model.add(keras.layers.Conv2D(128, (3,3), activation= 'relu', padding='same', kernel_initializer='he_normal'))
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.BatchNormalization())


model.add(keras.layers.Conv2D(256, (3,3), activation= 'relu', padding='same', kernel_initializer='he_normal'))
model.add(keras.layers.Conv2D(256, (3,3), activation= 'relu', padding='same', kernel_initializer='he_normal'))
model.add(keras.layers.MaxPooling2D())
model.add(keras.layers.BatchNormalization())



model.add(keras.layers.Flatten())


model.add(keras.layers.Dropout(rate=0.6))
model.add(keras.layers.Dense(units= 256, activation='relu',  kernel_initializer='he_normal'))
model.add(keras.layers.BatchNormalization())

model.add(keras.layers.Dense(units= 128, activation='relu',  kernel_initializer='he_normal'))
model.add(keras.layers.BatchNormalization())

model.add(keras.layers.Dense(units= 64, activation='relu',  kernel_initializer='he_normal'))
model.add(keras.layers.BatchNormalization())

model.add(keras.layers.Dense(units= 32, activation='relu',  kernel_initializer='he_normal'))
model.add(keras.layers.BatchNormalization())


model.add(keras.layers.Dense(units=32 ,activation='relu', kernel_initializer='he_normal',kernel_regularizer = regularizers.L1L2()))
model.add(keras.layers.BatchNormalization())


model.add(Dense(units = 7, activation = 'softmax' , kernel_initializer = 'glorot_uniform', name = 'classifier'))

model.compile(Adamax(learning_rate=0.001), loss = 'categorical_crossentropy' , metrics = ['accuracy'])


model.summary()






In [None]:
history = model.fit(X_train,
                    y_train,
                    epochs =25,
                    batch_size = 128,
                    validation_data =(X_test , y_test),
                    callbacks = [learning_rate_reduction])




                        

### Output the result

In [None]:
train_score = model.evaluate(X_train, y_train , verbose = 1)
test_score = model.evaluate(X_test , y_test , verbose =  1)


print("Train Loss :", train_score[0])
print("Train_accuracy : ",train_score[1])


print("Test Loss : " , test_score[0])
print('Test accuracy : ', test_score[1])


### Quick conclusion

As we can see, the model can detect whether a spot on your skin is malignant within an acceptable range. However, there is room for improvement in the model to reduce the significant loss during training.

In [None]:
y_true = np.array(y_test)

y_pred = np.array(X_test)

y_pred = np.argmax(y_pred , axis = 1)
y_true = np.argmax(y_true , axis = 1)