**1) Importing libraries**

In [6]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import numpy as np
import pandas as pd
import tensorflow as tf
import sklearn
from matplotlib import pyplot as plt

**2) Loading in the labels**

In [2]:
tabela = pd.read_csv('../input/isic-2019-reorganized/ISIC_2019_Training_GroundTruth-reo.csv')
tabela['image'] = tabela['image'].astype(str) + '.jpg'
tabela

Unnamed: 0,image,dx
0,ISIC_0024329.jpg,scc
1,ISIC_0024372.jpg,scc
2,ISIC_0024418.jpg,scc
3,ISIC_0024450.jpg,scc
4,ISIC_0024463.jpg,scc
...,...,...
25326,ISIC_0073231.jpg,mel
25327,ISIC_0073237.jpg,mel
25328,ISIC_0073238.jpg,mel
25329,ISIC_0073241.jpg,mel


In [3]:
tabela.groupby('dx')['dx'].count()

dx
ak        867
bcc      3323
bkl      2624
df        239
mel      4522
nv      12875
scc       628
vasc      253
Name: dx, dtype: int64

**3) Filtering data for only the desired classes**

In [4]:
t_ak = tabela.loc[tabela['dx'] == 'ak']
oznake = tabela['dx'].unique()
redukovana_tabela = pd.DataFrame({'image': [], 'dx': []})
for oznaka in (('bcc','mel')):
    t = tabela.loc[tabela['dx'] == oznaka]
    redukovana_tabela = pd.concat([redukovana_tabela,t],axis = 0)
redukovana_tabela
t = tabela.loc[tabela['dx']=='nv']
t = t.sample(n=5000)
redukovana_tabela =  pd.concat([redukovana_tabela,t],axis = 0)

**4) Loading in the base model**

In [7]:
osnova_modela = tf.keras.applications.InceptionV3(
    include_top=False,
    weights="imagenet",
    input_shape=(299, 299, 3),
)
#osnova_modela.summary()

**5) Adding new layers**

In [8]:
model = tf.keras.models.Sequential()
model.add(osnova_modela)
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(3, activation = 'sigmoid'))
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_v3 (Functional)    (None, 8, 8, 2048)        21802784  
_________________________________________________________________
flatten (Flatten)            (None, 131072)            0         
_________________________________________________________________
dense (Dense)                (None, 3)                 393219    
Total params: 22,196,003
Trainable params: 22,161,571
Non-trainable params: 34,432
_________________________________________________________________


**6) Calculating class weights**

In [9]:
y = redukovana_tabela['dx']
sklearn_weights = sklearn.utils.class_weight.compute_class_weight('balanced',classes = np.unique(y), y = y)
sklearn_weights_dict = dict(enumerate(sklearn_weights))
sklearn_weights_dict

{0: 1.2884943324305347, 1: 0.9468524251805985, 2: 0.8563333333333333}

**7) Spliting the data into training, validation and test sets**

In [10]:
from sklearn.model_selection import train_test_split
Y = redukovana_tabela.pop('dx').to_frame()
X_train, X_test, y_train, y_test = train_test_split(redukovana_tabela,Y, test_size = 0.1, random_state = 42)
X_train, X_val, y_train, y_val = train_test_split(X_train,y_train, test_size = 0.1, random_state = 42)

In [11]:
train = pd.concat([X_train, y_train], axis = 1)
val = pd.concat([X_val, y_val], axis = 1)
test = pd.concat([X_test, y_test], axis = 1)

**8) Transforming labels into an adequate format**

In [13]:
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder.fit(val['dx'])
name_as_indexes_train = encoder.transform(val['dx'])
val['label'] = name_as_indexes_train
encoder = LabelEncoder()
encoder.fit(test['dx'])
name_as_indexes_test = encoder.transform(test['dx'])
test['label'] = name_as_indexes_test

**9) Creating training data input generator**

In [14]:
train_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255, 
                                     horizontal_flip=True,
                                     vertical_flip=True,
                                     rotation_range=20,
                                     )
train_data = train_generator.flow_from_dataframe(dataframe = train, x_col = 'image', y_col = 'dx', batch_size = 64, directory = '../input/isic-2019/ISIC_2019_Training_Input/ISIC_2019_Training_Input',
                                                 shuffle = True, class_mode = 'categorical', target_size = (299, 299))

Found 10404 validated image filenames belonging to 3 classes.


**10) Creating validation data input generator**

In [16]:
val_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)
val_data = train_generator.flow_from_dataframe(dataframe = val, x_col = 'image', y_col = 'dx', batch_size = 1, directory = '../input/isic-2019/ISIC_2019_Training_Input/ISIC_2019_Training_Input',
                                               shuffle = True, class_mode = 'categorical', target_size = (299,299))

Found 1156 validated image filenames belonging to 3 classes.


**11) Creating test data input generator**

In [17]:
test_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)
test_data = train_generator.flow_from_dataframe(dataframe = test, x_col = 'image', y_col = 'dx', batch_size = 1, directory = '../input/isic-2019/ISIC_2019_Training_Input/ISIC_2019_Training_Input',
                                                 shuffle = False, class_mode = 'categorical', target_size = (299,299))

Found 1285 validated image filenames belonging to 3 classes.


In [18]:
model.load_weights('../input/inc3-isic19-3-class/Inc3_ISIC19.h5')

**12) Creating performance metrics and compiling the model**

In [19]:
METRICS = [ 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall')
]
initial_learning_rate = 5e-6
early_stop = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=6,
    restore_best_weights=True
)
adam = tf.keras.optimizers.Adam(learning_rate = 5e-6)
model.compile ( optimizer = adam, loss = 'categorical_crossentropy', metrics = METRICS)

**13) Training the model**

In [None]:
history = model.fit(train_data, steps_per_epoch = train_data.samples//train_data.batch_size,
                              validation_data = val_data,
                              class_weight = sklearn_weights_dict,
                              epochs = 4,
                              callbacks = [ early_stop])

**14) Saving weights**

In [None]:
model.save_weights('my_model_weights.h5')

**15) Testing and evaluation**

In [None]:
test_data.reset()
predictions = model.predict(test_data, steps = test_data.samples/test_data.batch_size, verbose = 1)
y_pred = np.argmax(predictions, axis = 1)
c = np.where(y_pred == name_as_indexes_test)
print('Preciznost je: ', (np.count_nonzero(c)/len(name_as_indexes_test))*100)

In [None]:
from sklearn.metrics import classification_report

report = classification_report(name_as_indexes_test, y_pred)

print(report)

In [None]:
p=sklearn.metrics.confusion_matrix(name_as_indexes_test, y_pred, labels=None, sample_weight=None, normalize=None)
sklearn.metrics.ConfusionMatrixDisplay(p, display_labels=None)
p