In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

In [None]:
labels_df = pd.read_csv('train_data.csv')
labels_df.head()

In [None]:

# Calculate the class weights based on class frequencies
class_labels, class_counts = np.unique(labels_df["label"], return_counts=True)
total_samples = np.sum(class_counts)
class_weights = {
    class_label: total_samples / (class_counts[i] * len(class_labels))
    for i, class_label in enumerate(class_labels)
}
print(class_labels, class_counts, total_samples)

class_counts = labels_df['label'].value_counts()

balanced_data = []

for class_label in class_labels:
    subset = labels_df[labels_df['label'] == class_label]
    num_images = min(560, len(subset))

    balanced_subset = subset.sample(n=num_images, random_state=42)
    
    balanced_data.append(balanced_subset)

balanced_df = pd.concat(balanced_data)

balanced_df.reset_index(drop=True, inplace=True)
balanced_df.head(300)

In [None]:
sns.countplot(data=balanced_df, x="label")
plt.xlabel("Disease Label")
plt.ylabel("Count")
plt.title("Distribution of Disease Classes")
plt.show()

In [None]:
num_samples = 4
train_dir = "Train"
for disease_label in class_labels:
    class_images = balanced_df.loc[labels_df["label"] == disease_label, "images"].sample(
        num_samples
    )
    plt.figure(figsize=(10, 8))

    for i, image_file_name in enumerate(class_images):
        image_path = os.path.join(train_dir, image_file_name)
        image = Image.open(image_path)

        plt.subplot(1, num_samples, i + 1)
        plt.imshow(image)
        plt.axis("off")
    plt.suptitle(f"Sample Images - Disease {disease_label}")
    plt.show()

In [None]:
balanced_df["images"] = balanced_df["images"].apply(
    lambda x: os.path.join(train_dir, x)
)
balanced_df["images"].head()

In [None]:
import tensorflow as tf
from keras.preprocessing.image import ImageDataGenerator
from keras.applications import MobileNetV3Small
from sklearn.model_selection import train_test_split


datagen = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode="nearest",
)
train_df, test_df = train_test_split(balanced_df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

img_size = (224, 224)
batch_size = 10

trgen = ImageDataGenerator(
    horizontal_flip=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2
)

t_and_v_gen = ImageDataGenerator()

train_gen = trgen.flow_from_dataframe(
    train_df,
    x_col='images',
    y_col='label',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=True,
    batch_size=batch_size
)

valid_gen = t_and_v_gen.flow_from_dataframe(
    val_df,
    x_col='images',
    y_col='label',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False,
    batch_size=batch_size
)

length = len(test_df)
test_batch_size = sorted([int(length/n) for n in range(1, length+1) if length % n == 0 and length/n <= 80], reverse=True)[0]
test_steps = int(length/test_batch_size)

test_gen = t_and_v_gen.flow_from_dataframe(
    test_df,
    x_col='images',
    y_col='label',
    target_size=img_size,
    class_mode='categorical',
    color_mode='rgb',
    shuffle=False,
    batch_size=test_batch_size
)

# Obtain information from the generator for later use
classes = list(train_gen.class_indices.keys())
class_indices = list(train_gen.class_indices.values())
class_count = len(classes)
labels = test_gen.labels

print('Test batch size:', test_batch_size)
print('Test steps:', test_steps)
print('Number of classes:', class_count)
print('{0:^25s}{1:^12s}'.format('Class name', 'Class index'))

for klass, index in zip(classes, class_indices):
    print(f'{klass:^25s}{str(index):^12s}')

In [None]:
num_classes = 4
# model = tf.keras.Sequential(
#     [
#         base_model,
#         tf.keras.layers.GlobalAveragePooling2D(),
#         tf.keras.layers.Dense(num_classes, activation="softmax"),
#     ]
# )
from keras.applications import MobileNetV3Small
from keras.layers import BatchNormalization, Dense, Dropout
from keras.models import Model
from keras.optimizers import Adamax
from keras import regularizers

img_shape = (img_size[0], img_size[1], 3)
base_model = MobileNetV3Small(
    weights="imagenet", include_top=False, input_shape=(224, 224, 3),pooling="max"
)
base_model.trainable = True

x = base_model.output
print("base model shape: ",x)
x = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001)(x)
x = Dense(1024, kernel_regularizer=regularizers.l2(l=0.016), activity_regularizer=regularizers.l1(0.006),
          bias_regularizer=regularizers.l1(0.006), activation='relu')(x)
x = Dropout(rate=0.3, seed=123)(x)
x = Dense(128, kernel_regularizer=regularizers.l2(l=0.016), activity_regularizer=regularizers.l1(0.006),
          bias_regularizer=regularizers.l1(0.006), activation='relu')(x)
x = Dropout(rate=0.45, seed=123)(x)

output = Dense(num_classes, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)

lr = 0.001
model.compile(Adamax(learning_rate=lr), loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(
    train_gen,
    epochs=5,
    validation_data=valid_gen,
    # callbacks=[early_stop, model_checkpoint, tensorboard],
)

test_loss, test_acc = model.evaluate(test_gen)
print("Test loss:", test_loss)
print("Test accuracy:", test_acc)

In [None]:
class_labels = np.unique(balanced_df["label"])
print("Class Labels:", class_labels, '\n')

out_of_scope = "paint.jpg"

for class_label in class_labels:
    class_subset = balanced_df[balanced_df["label"] == class_label]
    image_path = class_subset.iloc[0]['images']
    print(image_path)
    image = Image.open(image_path)
    image = image.resize((224, 224))  # Adjust size as required
#     image = np.array(image) / 255.0 #This is the cause of the error # Normalize pixel values 
    image = np.expand_dims(image, axis=0)  # Add batch dimension if required

    predictions = model.predict(image)
    formatted_predictions = ["{:.4f}".format(prob) for prob in predictions[0]]
    print("Predictions:", formatted_predictions)
#   print("Probability Sum:", np.sum(predictions, axis=1))

    predicted_class = np.argmax(predictions, axis=1)
    print("Predicted Class:", predicted_class)
    print("Class Label:", class_label)
    print("Image Path:", image_path.split("/")[-1])

In [None]:
from sklearn.metrics import confusion_matrix,classification_report
classes = list(labels_df["label"].unique())

# Predict labels for the test data
predictions = model.predict(test_gen)
y_true = test_gen.classes
y_pred = np.argmax(predictions, axis=1)

# Calculate confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Plot confusion matrix
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(num_classes)
plt.xticks(tick_marks, range(num_classes))
plt.yticks(tick_marks, range(num_classes))
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()
clr = classification_report(y_true, y_pred, target_names=classes, digits= 4) # create classification report
print("Classification Report:\n----------------------\n", clr)