In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the metadata into a Pandas dataframe
metadata = pd.read_csv('HAM10000_metadata.csv')

# Split the data into training and testing sets
train_metadata = metadata.sample(frac=0.8, random_state=42)
test_metadata = metadata.drop(train_metadata.index)

# Load pre-extracted features from disk
mobilenetv2_train_features = np.load('Mobilenetv2_train_features.npy')
mobilenetv2_test_features = np.load('Mobilenetv2_test_features.npy')

# Reshape the feature arrays
n_train_samples, train_height, train_width, train_channels = mobilenetv2_train_features.shape
n_test_samples, test_height, test_width, test_channels = mobilenetv2_test_features.shape
mobilenetv2_train_features = np.reshape(mobilenetv2_train_features, (n_train_samples, train_height * train_width * train_channels))
mobilenetv2_test_features = np.reshape(mobilenetv2_test_features, (n_test_samples, test_height * test_width * test_channels))

# Define the logistic regression classifier
clf = LogisticRegression(random_state=42, max_iter=1000)

# Train the classifier on the training set
clf.fit(mobilenetv2_train_features, train_metadata['dx'])

# Test the classifier on the testing set
predictions = clf.predict(mobilenetv2_test_features)
accuracy = accuracy_score(test_metadata['dx'], predictions)
print('Accuracy:', accuracy)
print(classification_report(test_metadata['dx'], predictions))
conf_mat = confusion_matrix(test_metadata['dx'], predictions)
print('Confusion Matrix:')
print(conf_mat)


Accuracy: 0.7988017973040439
              precision    recall  f1-score   support

       akiec       0.55      0.52      0.54        61
         bcc       0.75      0.69      0.72       107
         bkl       0.58      0.50      0.54       224
          df       0.75      0.16      0.26        19
         mel       0.56      0.48      0.52       211
          nv       0.87      0.93      0.90      1355
        vasc       0.94      0.62      0.74        26

    accuracy                           0.80      2003
   macro avg       0.71      0.56      0.60      2003
weighted avg       0.79      0.80      0.79      2003

Confusion Matrix:
[[  32    6   12    1    2    8    0]
 [   4   74    8    0    5   16    0]
 [  12    8  113    0   21   70    0]
 [   1    0    3    3    1   11    0]
 [   4    6   24    0  102   74    1]
 [   5    4   36    0   50 1260    0]
 [   0    1    0    0    2    7   16]]
