In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import seaborn as sns
from PIL import Image


In [None]:
np.random.seed(42)
from sklearn.metrics import confusion_matrix

In [None]:
import keras
from keras.utils.np_utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [None]:
print(keras.__version__)

In [None]:
skin_df = pd.read_csv('data/HAM10000_metadata.csv')
skin_df.head()

In [None]:
SIZE=64
le = LabelEncoder()
le.fit(skin_df['dx'])
LabelEncoder()
print(list(le.classes_))

In [None]:
skin_df['label'] = le.transform(skin_df["dx"]) 
print(skin_df.sample(5))

In [None]:
# Data distribution visualization
fig = plt.figure(figsize=(12,8))

ax1 = fig.add_subplot(221)
skin_df['dx'].value_counts().plot(kind='bar', ax=ax1)
ax1.set_ylabel('Count')
ax1.set_title('Cell Type');

ax2 = fig.add_subplot(222)
skin_df['sex'].value_counts().plot(kind='bar', ax=ax2)
ax2.set_ylabel('Count', size=15)
ax2.set_title('Sex');

ax3 = fig.add_subplot(223)
skin_df['localization'].value_counts().plot(kind='bar')
ax3.set_ylabel('Count',size=12)
ax3.set_title('Localization')

ax4 = fig.add_subplot(224)
sample_age = skin_df[pd.notnull(skin_df['age'])]
sns.distplot(sample_age['age'], fit=stats.norm, color='red');
ax4.set_title('Age')

plt.tight_layout()
plt.show()

In [None]:
from sklearn.utils import resample
print(skin_df['label'].value_counts())
df_0 = skin_df[skin_df['label'] == 0]
df_1 = skin_df[skin_df['label'] == 1]
df_2 = skin_df[skin_df['label'] == 2]
df_3 = skin_df[skin_df['label'] == 3]
df_4 = skin_df[skin_df['label'] == 4]
df_5 = skin_df[skin_df['label'] == 5]
df_6 = skin_df[skin_df['label'] == 6]
n_samples=500 
df_0_balanced = resample(df_0, replace=True, n_samples=n_samples, random_state=42) 
df_1_balanced = resample(df_1, replace=True, n_samples=n_samples, random_state=42) 
df_2_balanced = resample(df_2, replace=True, n_samples=n_samples, random_state=42)
df_3_balanced = resample(df_3, replace=True, n_samples=n_samples, random_state=42)
df_4_balanced = resample(df_4, replace=True, n_samples=n_samples, random_state=42)
df_5_balanced = resample(df_5, replace=True, n_samples=n_samples, random_state=42)
df_6_balanced = resample(df_6, replace=True, n_samples=n_samples, random_state=42)

In [None]:
#Combined back to a single dataframe
skin_df_balanced = pd.concat([df_0_balanced, df_1_balanced, 
                              df_2_balanced, df_3_balanced, 
                              df_4_balanced, df_5_balanced, df_6_balanced])

#Check the distribution. All classes should be balanced now.
print(skin_df_balanced['label'].value_counts())


In [None]:
image_path = {os.path.splitext(os.path.basename(x))[0]: x
                     for x in glob(os.path.join('data/', '*', '*.jpg'))}

skin_df_balanced['path'] = skin_df['image_id'].map(image_path.get)
skin_df_balanced['image'] = skin_df_balanced['path'].map(lambda x: np.asarray(Image.open(x).resize((SIZE,SIZE))))


n_samples = 5  

fig, m_axs = plt.subplots(7, n_samples, figsize = (4*n_samples, 3*7))
for n_axs, (type_name, type_rows) in zip(m_axs, 
                                         skin_df_balanced.sort_values(['dx']).groupby('dx')):
    n_axs[0].set_title(type_name)
    for c_ax, (_, c_row) in zip(n_axs, type_rows.sample(n_samples, random_state=1234).iterrows()):
        c_ax.imshow(c_row['image'])

        c_ax.axis('off')

In [None]:

X = np.asarray(skin_df_balanced['image'].tolist())
X = X/255. 
Y=skin_df_balanced['label']  
Y_cat = to_categorical(Y, num_classes=7) 

x_train, x_test, y_train, y_test = train_test_split(X, Y_cat, test_size=0.25, random_state=42)

In [None]:
num_classes = 7
model = Sequential()
model.add(Conv2D(256, (3, 3), activation="relu", input_shape=(SIZE, SIZE, 3)))
model.add(MaxPool2D(pool_size=(2, 2)))  
model.add(Dropout(0.3))

model.add(Conv2D(128, (3, 3),activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))  
model.add(Dropout(0.3))

model.add(Conv2D(64, (3, 3),activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))  
model.add(Dropout(0.3))
model.add(Flatten())

model.add(Dense(32))
model.add(Dense(7, activation='softmax'))
model.summary()

model.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['acc'])


In [None]:
batch_size = 16 
epochs = 100

history = model.fit(
    x_train, y_train,
    epochs=epochs,
    batch_size = batch_size,
    validation_data=(x_test, y_test),
    verbose=2)



In [None]:
score = model.evaluate(x_test, y_test)
print('Test accuracy:', score[1])

In [None]:
y_pred = model.predict(x_test)
y_pred_classes = np.argmax(y_pred, axis = 1) 
y_true = np.argmax(y_test, axis = 1) 


In [None]:
#Print confusion matrix
cm = confusion_matrix(y_true, y_pred_classes)

fig, ax = plt.subplots(figsize=(6,6))
sns.set(font_scale=1.6)
sns.heatmap(cm, annot=True, linewidths=.5, ax=ax)


In [None]:
#PLot fractional incorrect misclassifications
incorr_fraction = 1 - np.diag(cm) / np.sum(cm, axis=1)
plt.bar(np.arange(7), incorr_fraction)
plt.xlabel('True Label')
plt.ylabel('Fraction of incorrect predictions')

In [None]:
import cv2
img = 'ISIC_0034149.jpg'
img = cv2.resize(cv2.imread(img), (SIZE,SIZE)) / 255.0
prediction = model.predict(img.reshape(1,SIZE,SIZE,3))
# Convert predictions classes to one hot vectors 
y_pred_classes = np.argmax(prediction, axis = 1) 
# Convert test data to one hot vectors

y_true = np.argmax(prediction, axis = 1) 
print(y_true)


In [None]:
classes = {4: 'mel', 6: 'vasc', 2 :'bkl', 1:'bcc' , 5: 'nv', 0: 'akiec',  3: 'df'}
classes[y_true[0]]


In [None]:
import pickle
pickle.dump(model, open("model.pkl", "wb"))
# model = pickle.load(open("model.pkl", "rb"))

In [None]:
import joblib
filename = 'finalized_model.sav'
joblib.dump(model, filename)

 

In [None]:
from keras.models import save_model

# you can write whatever you desire instead of 'my_model'
# model = Your trained model
model.save('mymodel')

In [None]:
from keras.models import load_model
model = load_model("my_model")

In [None]:
img = 'ISIC_0034149.jpg'
img = cv2.resize(cv2.imread(img), (SIZE,SIZE)) / 255.0
prediction = model.predict(img.reshape(1,SIZE,SIZE,3))
y_pred_classes = np.argmax(prediction, axis = 1) 
# Convert test data to one hot vectors
y_true = np.argmax(prediction, axis = 1) 
print(y_true)

In [None]:
import cv2
import glob
a=[]
classes = {4: 'mel', 6: 'vasc', 2 :'bkl', 1:'bcc' , 5: 'nv', 0: 'akiec',  3: 'df'}
for img in glob.glob("testdata/*.jpg"):
    img = cv2.resize(cv2.imread(img), (SIZE,SIZE)) / 255.0
    prediction = model.predict(img.reshape(1,SIZE,SIZE,3))

    y_pred_classes = np.argmax(prediction, axis = 1) 
    y_true = np.argmax(prediction, axis = 1) 
    a.append(y_true[0])


In [None]:
classes = {4: 'mel', 6: 'vasc', 2 :'bkl', 1:'bcc' , 5: 'nv', 0: 'akiec',  3: 'df'}
for i in a:
    print(classes[i])