<a href="https://colab.research.google.com/github/ConanOReilly/Final_Year_Project/blob/main/Image_Data/Baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Baseline Model**
A baseline in machine learning serves as a simple reference point to evaluate the performance of more complex models.

The models used:


*   MobileNetV2 (frozen)
*   MobileNetV2 (unfrozen)



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# **Libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# **Preprocessing**
First iteration didn't identify 'MEL' and 'SCC', therefore class weights were computed.

In [None]:
# Define dataset path
data_dir = '/content/drive/MyDrive/Final Year Project/Code/MetaBlock/Data/imgs'
metadata_path = '/content/drive/MyDrive/Final Year Project/Data/PAD/Metadata/metadata.csv'

In [None]:
# Load metadata CSV
df = pd.read_csv(metadata_path)

# Keep only necessary columns
df = df[['img_id', 'diagnostic']]
df = df.rename(columns={'img_id': 'filename', 'diagnostic': 'label'})

In [None]:
# Image parameters
img_size = 224
batch_size = 32

# Image Data Augmentation
datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    validation_split=0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

train_generator = datagen.flow_from_dataframe(
    dataframe=df,
    directory=data_dir,
    x_col='filename',
    y_col='label',
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

val_generator = datagen.flow_from_dataframe(
    dataframe=df,
    directory=data_dir,
    x_col='filename',
    y_col='label',
    target_size=(img_size, img_size),
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

Found 1839 validated image filenames belonging to 6 classes.
Found 459 validated image filenames belonging to 6 classes.


In [None]:
# Class indices
class_indices = train_generator.class_indices
inv_class_indices = {v: k for k, v in class_indices.items()}

# Training set distribution
train_classes = train_generator.classes
train_class_counts = pd.Series(train_classes).value_counts().sort_index()
train_class_names = [inv_class_indices[i] for i in train_class_counts.index]
train_distribution = pd.DataFrame({'Class': train_class_names, 'Train Samples': train_class_counts.values})

# Validation set distribution
val_classes = val_generator.classes
val_class_counts = pd.Series(val_classes).value_counts().sort_index()
val_class_names = [inv_class_indices[i] for i in val_class_counts.index]
val_distribution = pd.DataFrame({'Class': val_class_names, 'Validation Samples': val_class_counts.values})

# Combine both
full_distribution = pd.merge(train_distribution, val_distribution, on='Class')
print(full_distribution)

  Class  Train Samples  Validation Samples
0   ACK            588                 142
1   BCC            668                 177
2   MEL             41                  11
3   NEV            198                  46
4   SCC            153                  39
5   SEK            191                  44


In [None]:
# Computing Weights
y_train = train_generator.classes
class_weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights_dict = dict(enumerate(class_weights))
print("Class Weights:", class_weights_dict)

Class Weights: {0: np.float64(0.5212585034013606), 1: np.float64(0.4588323353293413), 2: np.float64(7.475609756097561), 3: np.float64(1.547979797979798), 4: np.float64(2.003267973856209), 5: np.float64(1.6047120418848169)}


In [None]:
# Map class weights to class names
named_weights = {inv_class_indices[i]: round(weight, 4) for i, weight in class_weights_dict.items()}

# Print class weights
print("Class Weights (with class names):")
for label, weight in named_weights.items():
    print(f"{label}: {weight}")

Class Weights (with class names):
ACK: 0.5213
BCC: 0.4588
MEL: 7.4756
NEV: 1.548
SCC: 2.0033
SEK: 1.6047


**Frozen Model**

In [None]:
# Load Pretrained MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))
base_model.trainable = False

# Custom Classification Layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(6, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=x)

# Compile Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train Model
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    verbose=1,
    class_weight=class_weights_dict
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step


  self._warn_if_super_not_called()


Epoch 1/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1179s[0m 20s/step - accuracy: 0.2761 - loss: 1.8950 - val_accuracy: 0.4771 - val_loss: 1.4060
Epoch 2/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 2s/step - accuracy: 0.4038 - loss: 1.4827 - val_accuracy: 0.4379 - val_loss: 1.3932
Epoch 3/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 2s/step - accuracy: 0.4512 - loss: 1.3392 - val_accuracy: 0.3529 - val_loss: 1.3722
Epoch 4/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 2s/step - accuracy: 0.4223 - loss: 1.3347 - val_accuracy: 0.4619 - val_loss: 1.3575
Epoch 5/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 2s/step - accuracy: 0.4702 - loss: 1.2139 - val_accuracy: 0.4597 - val_loss: 1.3013
Epoch 6/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 2s/step - accuracy: 0.4844 - loss: 1.1881 - val_accuracy: 0.5381 - val_loss: 1.1387
Epoch 7/10
[1m58/58[0m [32m━━━━━━━

**Evaluation**

In [None]:
# Compute classification report
val_preds = model.predict(val_generator)
val_preds_classes = np.argmax(val_preds, axis=1)
true_classes = val_generator.classes
class_labels = list(val_generator.class_indices.keys())

class_report = classification_report(true_classes, val_preds_classes, target_names=class_labels, digits=4)
print("\nClassification Report:\n", class_report)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1s/step

Classification Report:
               precision    recall  f1-score   support

         ACK     0.4034    0.3380    0.3678       142
         BCC     0.4082    0.2260    0.2909       177
         MEL     0.0000    0.0000    0.0000        11
         NEV     0.0943    0.1087    0.1010        46
         SCC     0.1068    0.2821    0.1549        39
         SEK     0.1410    0.2500    0.1803        44

    accuracy                         0.2505       459
   macro avg     0.1923    0.2008    0.1825       459
weighted avg     0.3142    0.2505    0.2665       459



**Unfrozen Model**

In [None]:
# Load Pretrained MobileNetV2
base_model = MobileNetV2(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))
base_model.trainable = True

# Custom Classification Layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(6, activation='softmax')(x)

unfrozen_model = Model(inputs=base_model.input, outputs=x)

# Compile Model
unfrozen_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train Model
history = unfrozen_model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,
    verbose=1,
    class_weight=class_weights_dict
)

Epoch 1/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2113s[0m 36s/step - accuracy: 0.3201 - loss: 1.7727 - val_accuracy: 0.1002 - val_loss: 11.5470
Epoch 2/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 2s/step - accuracy: 0.4645 - loss: 1.2748 - val_accuracy: 0.1002 - val_loss: 10.5919
Epoch 3/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 2s/step - accuracy: 0.5290 - loss: 1.1223 - val_accuracy: 0.1002 - val_loss: 11.4435
Epoch 4/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 2s/step - accuracy: 0.4997 - loss: 1.0962 - val_accuracy: 0.1002 - val_loss: 14.4489
Epoch 5/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 2s/step - accuracy: 0.5492 - loss: 1.0418 - val_accuracy: 0.1002 - val_loss: 12.6247
Epoch 6/10
[1m58/58[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m100s[0m 2s/step - accuracy: 0.5784 - loss: 1.0013 - val_accuracy: 0.1002 - val_loss: 17.8501
Epoch 7/10
[1m58/58[0m [

**Evaluation**

In [None]:
# Compute classification report
val_preds = unfrozen_model.predict(val_generator)
val_preds_classes = np.argmax(val_preds, axis=1)
true_classes = val_generator.classes
class_labels = list(val_generator.class_indices.keys())

class_report = classification_report(true_classes, val_preds_classes, target_names=class_labels, digits=4)
print("\nClassification Report:\n", class_report)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1s/step

Classification Report:
               precision    recall  f1-score   support

         ACK     0.0000    0.0000    0.0000       142
         BCC     0.0000    0.0000    0.0000       177
         MEL     0.0000    0.0000    0.0000        11
         NEV     0.1002    1.0000    0.1822        46
         SCC     0.0000    0.0000    0.0000        39
         SEK     0.0000    0.0000    0.0000        44

    accuracy                         0.1002       459
   macro avg     0.0167    0.1667    0.0304       459
weighted avg     0.0100    0.1002    0.0183       459



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
