<a href="https://colab.research.google.com/github/Mullen44/CapstoneProject_SkinLesionClassification/blob/main/LogisiticRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Import Modules and Mount Drive

In [None]:
#CAPSTONE
# Mounting Google Drive
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

# Mount google drive
from google.colab import drive
drive.mount('/content/gdrive', force_remount = True)

# Authenticate and create the PyDrive client
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [None]:
# Import Necessary Libraries and Personal scripts
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/Scripts/
from Utils import *
from models import *
import tensorflow as tf
import numpy as np
import os
from sklearn import metrics
from sklearn.linear_model import LogisticRegressionCV
from sklearn import svm
import pickle

# Feature Extraction


In [None]:
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/
# Load Feature Extraction models
# Model A --> 1000 Class feature extractor
modelA = tf.keras.applications.ResNet50()

# Model B --> Transfer Learning Model
transferPath = 'newModels/TransferLearning/ResNet50/e100_BS16_lr1e-07/newmodel.hdf5'
modelB = get_model('ResNet50')
modelB.load_weights(transferPath)

# Model C --> Normal Learning Model
normPath = 'newModels/nonTransferLearning/ResNet50/e100_BS16_lr1e-05/newmodel.hdf5'
modelC = get_model('ResNet50')
modelC.load_weights(normPath)

# Load In Data
bklImagePath = 'Data/log/Images/bkl/'
melImagePath = 'Data/log/Images/mel/'
nvImagePath = 'Data/log/Images/nv/'

# Declare dictionaries for each type of image
bklImage = dict()
melImage = dict()
nvImage = dict()

# Load in images
for filename in os.listdir(bklImagePath) :
  bklImage[filename] = imread(bklImagePath + filename)

for filename in os.listdir(melImagePath) :
  melImage[filename] = imread(melImagePath + filename)

for filename in os.listdir(nvImagePath) :
  nvImage[filename] = imread(nvImagePath + filename)



In [None]:
# Declare Feature Paths
bklFeaturePath = 'Data/log/Features/bkl/'
melFeaturePath = 'Data/log/Features/mel/'
nvFeaturePath = 'Data/log/Features/nv/'

# Make BKL class Predictions
for key, value in bklImage.items() :

  # Preprocessing of the images
  img = np.array(value)
  img = cv2.resize(img, (224,224), interpolation=cv2.INTER_LINEAR)
  img = img.reshape(224,224,3)
  img = np.expand_dims(img, axis=0)

  # Feature extraction
  predA = np.array(modelA.predict(img)) # 1000 class feature extraction
  predB = np.array(modelB.predict(img)) # Transfer Learning 3 Class Prediction Feature extraction
  predC = np.array(modelC.predict(img)) # Normal Learning 3 class prediction Feature extraction
  label = np.array([0]) # Declare a numerical value that corresponds to the class

  # Concatenate 1000+3+3+1=1007 features into one array
  featureArray = np.concatenate([predA[0], predB[0], predC[0], label])
  # Create the bkl feature name
  featureName = bklFeaturePath + key[:-4] + '.npy'

  # Save the feature vectors to google drive
  np.save(featureName, featureArray)

print('Done BKL')

for key, value in melImage.items() :
  # Preprocessing of the images
  img = np.array(value)
  img = cv2.resize(img, (224,224), interpolation=cv2.INTER_LINEAR)
  img = img.reshape(224,224,3)
  img = np.expand_dims(img, axis=0)

  # Feature Extraction
  predA = np.array(modelA.predict(img)) # 1000 class feature extraction
  predB = np.array(modelB.predict(img)) # Transfer learning 3 class prediction feature extraction
  predC = np.array(modelC.predict(img)) # Normal learning 3 class prediction feature extraction
  label = np.array([1]) # Declare a numerical value that corresponds to the class

  # Concatenate 1000+3+3+1=1007 features into one array
  featureArray = np.concatenate([predA[0], predB[0], predC[0], label])
  # Create the mel feature name
  featureName = melFeaturePath + key[:-4] + '.npy'

  # Save the feature vectors to google drive
  np.save(featureName, featureArray)

print('Done Mel')

for key, value in nvImage.items() :
  # Pre processing of images
  img = np.array(value)
  img = cv2.resize(img, (224,224), interpolation=cv2.INTER_LINEAR)
  img = img.reshape(224,224,3)
  img = np.expand_dims(img, axis=0)

  # Feature Extraction
  predA = np.array(modelA.predict(img)) # 1000 Class feature extraction
  predB = np.array(modelB.predict(img)) # Transfer learning 3 class prediction feature extraction
  predC = np.array(modelC.predict(img)) # Normal learning 3 class prediction feature extraction
  label = np.array([2]) # Declare a numerical value that corresponds to the class

  # Concatenate 1000+3+3+1=1007 features into one array
  featureArray = np.concatenate([predA[0], predB[0], predC[0], label])
  # Create the nv feature name
  featureName = nvFeaturePath + key[:-4] + '.npy'

  # Save feature vectors to google drive
  np.save(featureName, featureArray)

print('Done NV')

In [None]:
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/
bklImagePath = 'Data/log/Images/bkl'
melImagePath = 'Data/log/Images/mel'
nvImagePath = 'Data/log/Images/nv'
bklFeaturePath = 'Data/log/Features/bkl'
melFeaturePath = 'Data/log/Features/mel'
nvFeaturePath = 'Data/log/Features/nv'

print(len(os.listdir(melImagePath)))
print(len(os.listdir(melFeaturePath)))
print(len(os.listdir(bklImagePath)))
print(len(os.listdir(bklFeaturePath)))
print(len(os.listdir(nvImagePath)))
print(len(os.listdir(nvFeaturePath)))

# Data Division

In [None]:
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/
# Load In all features from the feature folder
bklFeaturePath = 'Data/log/Features/bkl/'
melFeaturePath = 'Data/log/Features/mel/'
nvFeaturePath = 'Data/log/Features/nv/'

# Declare empty vectors with size rows = # of instances and columns = # of features
bklSize = (549, 1007)
melSize = (556, 1007)
nvSize = (559, 1007)
bklFeature = np.zeros(bklSize)
melFeature = np.zeros(melSize)
nvFeature = np.zeros(nvSize)

# Load in bkl features
i = 0
for filename in os.listdir(bklFeaturePath) :
  bklFeature[i,:] = np.load(bklFeaturePath + filename)
  i += 1

# Load in nv features (only one out of every 6 to reduce the number of instances 
# to a similar size as other classes)
i = 0
j = 0
for filename in os.listdir(nvFeaturePath) :
  if i <= 3352//6 :
    nvFeature[j,:] = np.load(nvFeaturePath + filename)
    j += 1

  i += 1

# Load in mel features
i = 0
for filename in os.listdir(melFeaturePath) :
  melFeature[i,:] = np.load(melFeaturePath + filename)
  i += 1

# Divide the data into 80% for training and 20% testing
# Randomly get indices for the 80% training
bklIndex = indexCreation(bklFeature, 0.8)
melIndex = indexCreation(melFeature, 0.8)
nvIndex = indexCreation(nvFeature, 0.8)

# Declare an empty array for bkl test and training data
bklTestFeature = np.zeros((110, 1007))
bklTrainFeature = np.zeros((439, 1007))

# Declare counters
trainCount = 0
testCount = 0

# Divide the data
for i in range(len(bklFeature)) :
  if i in bklIndex :
    # Place in the bkl train variable
    bklTrainFeature[trainCount, :] = bklFeature[i,:]
    trainCount += 1

  else :
    # Place in bkl test variable
    bklTestFeature[testCount, :] = bklFeature[i,:]
    testCount += 1

# Declare an empty array for mel test and training
melTrainFeature = np.zeros((int(0.8*556), 1007))
melTestFeature = np.zeros((556-int(0.8*556), 1007))

# Declare counters
trainCount = 0
testCount = 0

# Divide the data
for i in range(len(melFeature)) :
  if i in melIndex :
    # Place in the mel train variable
    melTrainFeature[trainCount, :] = melFeature[i,:]
    trainCount += 1

  else :
    # Place in mel test variable
    melTestFeature[testCount, :] = melFeature[i,:]
    testCount += 1

# Declare empty arrays for mel train and test
nvTrainFeature = np.zeros((int(0.8*559), 1007))
nvTestFeature = np.zeros((559-int(0.8*559), 1007))

# Declare counters
trainCount = 0
testCount = 0

# Divide the data
for i in range(len(nvFeature)) :
  if i in nvIndex :
    # Place in nv train variable
    nvTrainFeature[trainCount, :] = nvFeature[i,:]
    trainCount += 1

  else :
    # Place in nv test variable
    nvTestFeature[testCount, :] = nvFeature[i,:]
    testCount += 1

# Stack the 3 classes on top of eachother
train = np.vstack([melTrainFeature, bklTrainFeature, nvTrainFeature])
test = np.vstack([melTestFeature, bklTestFeature, nvTestFeature])

# Save to google drive
np.save('Data/log/Features/test.npy', test)
np.save('Data/log/Features/train.npy', train)

# Logistic Regression Training

In [None]:
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/
# Load in training and testing data
dataTrain = np.load('Data/log/Features/train.npy')
dataTest = np.load('Data/log/Features/test.npy')

# Divide into X and y for training and testing
# X is the features and y is the labels at the end
X_train = dataTrain[:, 0:1006]
y_train = dataTrain[:, 1006]
X_test = dataTest[:, 0:1006]
y_test = dataTest[:, 1006]

# Declare parameters
num_Folds = 7
num_iterations = 10000

# Declare model
model = LogisticRegressionCV(cv = num_Folds, verbose = 2, max_iter = num_iterations)

#model = svm.SVC(decision_function_shape='ovo')
# Fit/train model
model.fit(X_train,y_train)

# Declare model path
modelPath = 'Models/LogRegModel/' + str(num_Folds) + '_Fold_' + str(num_iterations) + '_iter.pkl'
# Save model
pickle.dump(model, open(modelPath, 'wb'))

print(model.score(X_test, y_test))

/content/gdrive/Shared drives/CAPSTONE/DeepLearning


# Testing/Evaluation

In [None]:
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/
# Load in testing data
dataTest = np.load('Data/log/Features/test.npy')
X_test = dataTest[:, 0:1006]
y_test = dataTest[:, 1006]

# Load in model
model = pickle.load(open('Models/LogRegModel/7_Fold_10000_iter.pkl', 'rb'))

# Test Model
y_pred = model.predict(X_test)

print('Score:', model.score(X_test, y_test))

# Print the confusion matrix
print(metrics.confusion_matrix(y_test, y_pred))

# Print the precision and recall, among other metrics
print(metrics.classification_report(y_test, y_pred, digits=3))

## Create TF Layer

In [None]:
def indexMax(input) :
  return np.argmax(np.array(input))

In [None]:
#############

# Load sklearn logistic regression model
model = pickle.load(open('Models/LogRegModel/7_Fold_10000_iter.pkl', 'rb'))

# create a TF model with the same architecture
tf_model = tf.keras.models.Sequential()
tf_model.add(tf.keras.Input(shape=(1006)))
tf_model.add(tf.keras.layers.Dense(3))


# assign the parameters from sklearn to the TF model
tf_model.layers[0].weights[0].assign(model.coef_.transpose())
tf_model.layers[0].bias.assign(model.intercept_.transpose())

# Save the new tensorflow version of the model
tf_model.save('/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/TF/logRegModel.hdf5')

count = 0

for i in range(len(X_test)) :
  temp = np.expand_dims(X_test[i], axis=0)
  tflow = indexMax(tf_model(temp))
  sk = model.predict(temp)
  print('Input # %d' %i)
  print(tf_model(temp))
  print(tflow)
  print(sk)
  if tflow != sk :
    print('FLAG')
    count+=1

print ('Logistic TensorFlow Model is type:', type(tf_model))

print(count)

# verify the models do the same prediction
#assert np.all((tf_model(x) > 0)[:, 0].numpy() == model.predict(x))
################

In [None]:
tf_model.save('/content/gdrive/Shareddrives/CAPSTONE/DeepLearning/TF/logRegModel.hdf5')

In [None]:
act_y = pd.Series(y_test, name = 'Actual')
pred_y = pd.Series(y_pred, name = 'Predicted')
df_conf = pd.crosstab(act_y, pred_y)
print(df_conf)

df_conf1 = pd.crosstab(act_y, pred_y, rownames=['Actual'], colnames=['Predicted'], margins=True)
print(df_conf1)

# if you need to normalize it
df_conf_norm = df_conf / df_conf.sum(axis=1)
print(df_conf_norm1)
print()

# now for plotting
def plot_confusion_matrix(df_conf, title='Confusion matrix', cmap=plt.cm.gray_r):
    plt.matshow(df_conf, cmap=cmap) # imshow
    #plt.title(title)
    plt.colorbar()

    tick_marks = np.arange(len(df_conf.columns))
    plt.xticks(tick_marks, df_conf.columns, rotation=45)
    plt.yticks(tick_marks, df_conf.index)
    
    #plt.tight_layout()
    plt.ylabel(df_conf.index.name)
    plt.xlabel(df_conf.columns.name)

plot_confusion_matrix(df_conf)

In [None]:
def changeLabels(data) :
  out = []
  for i in range(len(data)) :
    if data[i] == 0.0 :
      out.append('bkl')
    elif data[i] == 1.0 :
      out.append('mel')
    else :
      out.append('nv')
x = pd.Series(changeLabels(y_test))
y = pd.Series(changeLabels(y_pred))

df_conf = pd.crosstab(x, y)
print(df_conf)

# Package Models

In [None]:
%cd /content/gdrive/Shared drives/CAPSTONE/DeepLearning/
# Load Deep Learning Models
# Model A --> 1000 Class feature extractor
modelA = tf.keras.applications.ResNet50()

# Model B --> Transfer Learning Model
transferPath = 'newModels/TransferLearning/ResNet50/e100_BS16_lr1e-07/newmodel.hdf5'
modelB = get_model('ResNet50')
modelB.load_weights(transferPath)

# Model C --> Normal Learning Model
normPath = 'newModels/nonTransferLearning/ResNet50/e100_BS16_lr1e-05/newmodel.hdf5'
modelC = get_model('ResNet50')
modelC.load_weights(normPath)

# Model D Load Logistic Regression Model
logPath = 'Models/LogRegModel/7_Fold_10000_iter.pkl'
model = pickle.load(open(logPath, 'rb'))

In [None]:
from tensorflow.keras import Model
from keras.layers.merge import concatenate
from keras import layers
import itertools


def define_stacked_model2(modelA, modelB, modelC):
  
  # Update all laters in all models to not be trainable
  
  for layer in modelA.layers:
    # Make not trainable
    layers.trainable = False
    # Rename to avoid unique layer name issue
    layer._name ='ensemble_A_' +layer.name

  for layer in modelB.layers:
    # Make not trainable
    layers.trainable = False
    # Rename to avoid unique layer name issue
    layer._name ='ensemble_B_' +layer.name

  for layer in modelC.layers:
    # Make not trainable
    layers.trainable = False
    # Rename to avoid unique layer name issue
    layer._name ='ensemble_C_' +layer.name

  # Define multi-headed input
  ensemble_visible = [modelA.input, modelB.input, modelC.input]
  # Define output
  ensemble_output = [modelA.output, modelB.output, modelC.output]

  # Create model
  model = Model(inputs=ensemble_visible, outputs=ensemble_output)

  # Compile
  model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

  return model

In [None]:
ensembleModel = define_stacked_model2(modelA, modelB, modelC)
#ensembleModel.summary()
ensembleModel.save('ensembleModels/deepModel.hdf5')

In [None]:
ensembleModel.summary()