<a href="https://colab.research.google.com/github/Abhiram102000/SkinCancerDetection/blob/main/SCD_v2_PatientData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Importing the libraries

%matplotlib inline
!pip install -U tensorflow-addons
!pip install -q "tqdm>=4.36.1"

import tensorflow as tf
import tensorflow_addons as tfa
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from tensorflow.keras.utils import get_file
from sklearn.metrics import roc_curve, auc, confusion_matrix
from imblearn.metrics import sensitivity_score, specificity_score
import itertools

import keras
from keras.utils.np_utils import to_categorical # used for converting labels to one-hot-encoding
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau
from keras.wrappers.scikit_learn import KerasClassifier
from keras.applications.inception_v3 import InceptionV3
from keras import backend as K 
from PIL import Image
from sklearn.preprocessing import StandardScaler 
from sklearn.model_selection import train_test_split, KFold, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score

import os
import glob
import zipfile
import random

# to get consistent results after multiple runs
tf.random.set_seed(10)
np.random.seed(10)
random.seed(10)

In [None]:
train_df = pd.read_csv('trainSet.csv')
test_df = pd.read_csv('testSet.csv')

train_df = train_df.rename(columns = {'anatom_site_general_challenge':'site'})
test_df = test_df.rename(columns = {'anatom_site_general_challenge':'site'})

train_ds = train_df.drop(['image_name', 'patient_id', 'benign_malignant', 'diagnosis' ], axis = 1)
test_ds = test_df.drop(['image_name', 'patient_id', 'benign_malignant', 'diagnosis' ], axis = 1)

In [None]:
le = LabelEncoder()
train_ds['sex'] = le.fit_transform(train_ds['sex'])
test_ds['sex'] = le.fit_transform(test_ds['sex'])

train_ds['site'].unique()

In [None]:
def site_colmn(site):
  train_ds[site] = [
                    1 if typ == site else 0 for typ in train_ds['site']
                    ]
  test_ds[site] = [
                    1 if typ == site else 0 for typ in test_ds['site']
                    ]
site_colmn('lower extremity')
site_colmn('upper extremity')
site_colmn('torso')
site_colmn('head/neck')
site_colmn('oral/genital')
site_colmn('palms/soles')
train_ds = train_ds.drop('site', axis=1)
test_ds = test_ds.drop('site', axis=1)

In [None]:
X_train = train_ds
X_test = test_ds
y_train = X_train.pop('target')
y_test = X_test.pop('target')

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import LogisticRegression
classifier_lr = LogisticRegression()
classifier_lr.fit(X_train_scaled, y_train)
y_pred_lr = classifier_lr.predict(X_test_scaled)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_lr))

In [None]:
from sklearn import svm
classifier_svm = svm.SVC()
classifier_lr.fit(X_train_scaled, y_train)
y_pred_svm = classifier_svmpredict(X_test_scaled)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_svm))

In [None]:
from sklearn.tree import DecisionTreeClassifier
classifier_dt = DecisionTreeClassifier()
classifier_dt.fit(X_train_scaled, y_train)
y_pred_dt = classifier_dt.predict(X_test_scaled)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_lr))

In [None]:
from sklearn.ensemble import RandomForestClassifier
classifier_rf = RandomForestClassifier()
classifier_rf.fit(X_train_scaled, y_train)
y_pred_rf = classifier_rf.predict(X_test_scaled)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_rf))

In [None]:
from sklearn.ensemble import AdaBoostClassifier
classifier_ab = AdaBoostClassifier()
classifier_ab.fit(X_train_scaled, y_train)
y_pred_ab = classifier_ab.predict(X_test_scaled)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_ab))

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
classifier_gb = GradientBoostingClassifier()
classifier_gb.fit(X_train_scaled, y_train)
y_pred_gb = classifier_gb.predict(X_test_scaled)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_gb))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
classifier_KNC = KNeighborsClassifier()
classifier_KNC.fit(X_train_scaled, y_train)
y_pred_KNC = classifier_KNC.predict(X_test_scaled)
print('Accuracy Score: ',accuracy_score(y_test,y_pred_ab))


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
class_names = ["Benign", "Malignant"]
def analyze_grid_result(grid_result):
   
    # Best parameters and accuracy
    print("Tuned hyperparameters: (best parameters) ", grid_result.best_params_)
    print("Accuracy :", grid_result.best_score_)
    
    means = grid_result.cv_results_["mean_test_score"]
    stds = grid_result.cv_results_["std_test_score"]
    for mean, std, params in zip(means, stds, grid_result.cv_results_["params"]):
        print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
    print()
    print("Detailed classification report:")
    y_true, y_pred = y_test, grid_result.predict(X_test)
    print(classification_report(y_true, y_pred))
    print('Confusion Matrix: \n',confusion_matrix(y_true, y_pred))
    print()
    plt.figure()
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    # obtain ROC AUC
    roc_auc = auc(fpr, tpr)
    # print score
    print(f"ROC AUC: {roc_auc:.3f}")
    # plot ROC curve
    plt.plot(fpr, tpr, color="blue", lw=2,
                label='ROC curve (area = {f:.2f})'.format(d=1, f=roc_auc))
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC curves')
    plt.legend(loc="lower right")
    plt.show()
    print()
    cmn = confusion_matrix(y_test, y_pred)
    # Normalise
    cmn = cmn.astype('float') / cmn.sum(axis=1)[:, np.newaxis]
    # print it
    print(cmn)
    fig, ax = plt.subplots(figsize=(10,10))
    sns.heatmap(cmn, annot=True, fmt='.2f', 
              xticklabels=[f"Predicted_{c}" for c in class_names], 
              yticklabels=[f"True_{c}" for c in class_names],
              cmap="Blues"
              )
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    # plot the resulting confusion matrix
    plt.show()

In [None]:
# Define models and parameters for LogisticRegression
model = LogisticRegression(solver='liblinear')
solvers = ['newton-cg', 'liblinear']
penalty = ['l2']
c_values = [100, 10, 1.0, 0.1, 0.01]
# Define grid search
grid = dict(solver = solvers, penalty = penalty, C = c_values)
cv = StratifiedKFold(n_splits = 50, random_state = 1, shuffle = True)
grid_search = GridSearchCV(estimator = model, param_grid = grid, cv = cv, scoring = 'accuracy', error_score = 0)
logi_result = grid_search.fit(X_train, y_train)
# Logistic Regression Hyperparameter Result
analyze_grid_result(logi_result)

In [None]:
# Define models and parameters for LogisticRegression
model = SVC()
# Define grid search
tuned_parameters = [
    {"kernel": ["rbf"], "gamma": [1e-3, 1e-4], "C": [1, 10, 100, 1000]},
    {"kernel": ["linear"], "C": [1, 10, 100, 1000]},
]
cv = StratifiedKFold(n_splits = 2, random_state = 1, shuffle = True)
grid_search = GridSearchCV(estimator = model, param_grid = tuned_parameters, cv = cv, scoring = 'accuracy', error_score = 0)
scv_result = grid_search.fit(X_train, y_train)
# SVC Hyperparameter Result
analyze_grid_result(scv_result)

In [None]:
 #Define models and parameters for LogisticRegression
model = RandomForestClassifier(random_state=42)
# Define grid search
tuned_parameters = { 
    'n_estimators': [200, 500],
    'max_features': ['auto', 'sqrt', 'log2'],
    'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy']
}
cv = StratifiedKFold(n_splits = 2, random_state = 1, shuffle = True)
grid_search = GridSearchCV(estimator = model, param_grid = tuned_parameters, cv = cv, scoring = 'accuracy', error_score = 0)
grid_result = grid_search.fit(X_train, y_train)
# SVC Hyperparameter Result
analyze_grid_result(grid_result)

In [None]:
lr = 0.03
epochs = 500
batch_size = 64

learning_rate_reduction = ReduceLROnPlateau(monitor='val_accuracy', 
                                            patience=25, 
                                            verbose=1, 
                                            factor=0.05, 
                                            min_lr=2.6e-6)

tqdm_callback = tfa.callbacks.TQDMProgressBar()

model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', kernel_initializer='he_normal'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    loss = 'binary_crossentropy',
    optimizer = Adam(lr),
    metrics=[
        tf.keras.metrics.BinaryAccuracy(name='accuracy'),
        tf.keras.metrics.Precision(name='precision'),
        tf.keras.metrics.Recall(name='recall')
    ]
)

In [None]:
history = model.fit(X_train_scaled, y_train, validation_split=0.2,
                    epochs= epochs, batch_size= batch_size, verbose=2,
                    callbacks=[learning_rate_reduction]
                   )

In [None]:
y_pred = model.predict(X_test_scaled)
prediction_classes = [
    1 if prob > 0.12 else 0 for prob in np.ravel(y_pred)
]

In [None]:
def plot_roc_auc(y_true, y_pred):
  
    # prepare for figure
    plt.figure()
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    # obtain ROC AUC
    roc_auc = auc(fpr, tpr)
    # print score
    print(f"ROC AUC: {roc_auc:.3f}")
    # plot ROC curve
    plt.plot(fpr, tpr, color="blue", lw=2,
                label='ROC curve (area = {f:.2f})'.format(d=1, f=roc_auc))
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC curves')
    plt.legend(loc="lower right")
    plt.show()

plot_roc_auc(y_test, prediction_classes)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

print(f'Accuracy: {accuracy_score(y_test, prediction_classes)}')
print(f'Precision: {precision_score(y_test, prediction_classes)}')
print(f'Recall: {recall_score(y_test, prediction_classes)}')

In [None]:
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['precision'])
plt.plot(history.history['val_precision'])
plt.title('Model Precision')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(history.history['recall'])
plt.plot(history.history['val_recall'])
plt.title('Model Recall')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
class_names = ["Benign", "Malignant"]
def plot_confusion_matrix(y_test, y_pred):
  cmn = confusion_matrix(y_test, y_pred)
  # Normalise
  cmn = cmn.astype('float') / cmn.sum(axis=1)[:, np.newaxis]
  # print it
  print(cmn)
  fig, ax = plt.subplots(figsize=(10,10))
  sns.heatmap(cmn, annot=True, fmt='.2f', 
              xticklabels=[f"Predicted_{c}" for c in class_names], 
              yticklabels=[f"True_{c}" for c in class_names],
              cmap="Blues"
              )
  plt.ylabel('Actual')
  plt.xlabel('Predicted')
  # plot the resulting confusion matrix
  plt.show()

plot_confusion_matrix(y_test, prediction_classes)