In [1]:
import tensorflow as tf
import numpy as np
from PIL import Image
import pandas as pd
import os

import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, Dropout, Flatten, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("E:\DataSets\MNIST skin cancer\HAM10000_metadata.csv")

In [3]:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10015 entries, 0 to 10014
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   lesion_id     10015 non-null  object 
 1   image_id      10015 non-null  object 
 2   dx            10015 non-null  object 
 3   dx_type       10015 non-null  object 
 4   age           9958 non-null   float64
 5   sex           10015 non-null  object 
 6   localization  10015 non-null  object 
dtypes: float64(1), object(6)
memory usage: 547.8+ KB


In [5]:
img_dir = r"E:\DataSets\MNIST skin cancer\ALL"
all_dir = [os.path.join(img_dir, img_name) + ".jpg" for img_name in df['image_id']]

In [6]:
all_dir

['E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0027419.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0025030.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0026769.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0025661.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0031633.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0027850.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0029176.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0029068.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0025837.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0025209.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0025276.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0029396.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0025984.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0025767.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0032417.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0031326.jpg',
 'E:\\DataSets\\MNIST skin cancer\\ALL\\ISIC_0025915.jpg

In [7]:
df.insert(7, "image_directory", all_dir, True)

In [8]:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_directory
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,E:\DataSets\MNIST skin cancer\ALL\ISIC_0027419...
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,E:\DataSets\MNIST skin cancer\ALL\ISIC_0025030...
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,E:\DataSets\MNIST skin cancer\ALL\ISIC_0026769...
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,E:\DataSets\MNIST skin cancer\ALL\ISIC_0025661...
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,E:\DataSets\MNIST skin cancer\ALL\ISIC_0031633...


In [9]:
new_df = pd.concat([df, pd.get_dummies(df['dx'])], axis = 1)
new_df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,image_directory,akiec,bcc,bkl,df,mel,nv,vasc
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp,E:\DataSets\MNIST skin cancer\ALL\ISIC_0027419...,0,0,1,0,0,0,0
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp,E:\DataSets\MNIST skin cancer\ALL\ISIC_0025030...,0,0,1,0,0,0,0
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp,E:\DataSets\MNIST skin cancer\ALL\ISIC_0026769...,0,0,1,0,0,0,0
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp,E:\DataSets\MNIST skin cancer\ALL\ISIC_0025661...,0,0,1,0,0,0,0
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear,E:\DataSets\MNIST skin cancer\ALL\ISIC_0031633...,0,0,1,0,0,0,0


In [11]:
full_train_df, test_df = train_test_split(new_df, test_size = 0.1, shuffle=True)

In [12]:
train_df, valid_df = train_test_split(full_train_df, test_size = 0.2, shuffle = True)

In [15]:
class multi_layer_conv:
    def __init__(self, train_df, valid_df, test_df, batch_size):
        self.train_df = train_df
        self.valid_df = valid_df
        self.test_df = test_df
        self.batch_size = batch_size
        self.train_gen = self.train_gen = self.get_gen(self.train_df)
        self.valid_gen = self.get_gen(self.valid_df)
        self.test_gen = self.get_gen(self.test_df)
        self.model = Sequential()
        
        
        
    def get_gen(self, df):
        return ImageDataGenerator().flow_from_dataframe(
        dataframe = df,
        x_col = "image_directory",
        y_col = df.columns[8:],
        target_size = (128, 128),
        color_mode = "rgb",
        class_mode = "raw",
        batch_size = self.batch_size,
        )
    
    def choose(self, item, learning_rate = 0.001):
        if item == "gradient":
            return tf.keras.optimizers.SGD(learning_rate = learning_rate)
        elif item == 'adam':
            return 'adam'
        elif item == 'binary_crossentropy':
            return 'binary_crossentropy'
        elif item == 'mean_squared_error':
            return tf.keras.losses.MeanSquaredError()

    def build_model(self, optimizer, loss, learning_rate = 0.001):
        self.model = Sequential([
            Conv2D(16, (5,5), padding = 'valid', activation = 'relu', input_shape = (128, 128, 3)),
            MaxPooling2D(),
            
            Conv2D(32, (5,5), activation = 'relu', padding = 'valid'),
            MaxPooling2D(pool_size = (2,2), padding = 'valid'),
            
            
            Conv2D(64, (3,3), activation = 'relu', padding = 'valid'),
            MaxPooling2D(pool_size = (2,2), padding = 'valid'),
            
            Conv2D(128, (3,3), activation = 'relu', padding = 'valid'),
            MaxPooling2D(pool_size = (2,2), padding = 'valid'),
            
            
            Flatten(),
            Dense(64, activation = 'relu'),
            Dense(32, activation = 'relu'),
    
            Dense(7, activation = 'softmax')
        ])
        
        self.model.summary()
        self.model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001),
                     loss = 'binary_crossentropy',
                     metrics = [tf.keras.metrics.MeanSquaredError(), 'accuracy'])
        
    
    def train_model(self, epochs):
        ssTrain = self.train_gen.n//self.batch_size
        ssValid = self.valid_gen.n//self.batch_size
        history = self.model.fit(
            self.train_gen,
            epochs = epochs,
            steps_per_epoch = ssTrain,
            validation_steps = ssValid,
            validation_data = self.valid_gen
        )
        return history
    
    def test_model(self):
        test = self.model.evaluate(self.test_gen) 
        return test

In [17]:
model = multi_layer_conv(train_df, valid_df, test_df, 32)
model.build_model("adam", "binary_crossentropy", 0.001)
history = model.train_model(5)

Found 7210 validated image filenames.
Found 1803 validated image filenames.
Found 1002 validated image filenames.
Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 124, 124, 16)      1216      
                                                                 
 max_pooling2d_8 (MaxPooling  (None, 62, 62, 16)       0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 58, 58, 32)        12832     
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 29, 29, 32)       0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 27, 27, 64)        18496     
      

In [20]:
model.model.evaluate(model.train_gen)



[0.19269858300685883, 0.05617232993245125, 0.7067961096763611]