In [None]:
import tensorflow as tf
import seaborn as sns
import matplotlib.pyplot as plt
import cv2
import os
import numpy as np 
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report as cs, confusion_matrix
from tensorflow import keras
from tensorflow.keras.metrics import Precision, Recall
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout,GlobalAveragePooling2D, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.layers import RandomFlip, RandomRotation, RandomZoom, RandomContrast, RandomTranslation
from tensorflow.keras.models import Model

In [None]:
DATASET_PATH = Path("E:\SkinTone_Dataset_Google\mst-e_data")
SKIP_EXTENSION = 'mp4'

In [1]:
MAP_OBJECT = {
    "subject_16": 1,
    "subject_18": 1,
    "subject_1": 2,
    "subject_8": 2,
    "subject_13": 2,
    "subject_0": 3,
    "subject_15": 3,
    "subject_7": 4,
    "subject_9": 4,
    "subject_6": 5,
    "subject_11": 5,
    "subject_3": 6,
    "subject_14": 6,
    "subject_5": 7,
    "subject_2": 8,
    "subject_17": 8,
    "subject_4": 9,
    "subject_10": 9,
    "subject_12": 10
}

In [None]:
def get_organize_data(Dataset_path: Path) -> dict:
    try:
        Data = {}
        folders = os.listdir(Dataset_path)[5:]
        for folder in folders:  
            folder_path = Dataset_path / folder
            if not folder_path.is_dir():
                continue
            skin_tone = MAP_OBJECT.get(folder)
            if skin_tone is None:
                continue  
            for file in os.listdir(folder_path):
                file_path = folder_path / file
                if file_path.suffix.lower() in ['.jpg', '.jpeg', '.png']:
                    Data.setdefault(skin_tone, []).append(str(file_path))
        return Data
    except Exception as e:
        raise e 

In [None]:
organize_data=get_organize_data(DATASET_PATH)

In [None]:
total_images=0
class_wise_distribution={}
for key,values in organize_data.items():
    total_images+=len(values)
    class_wise_distribution[key] = len(values)
classes = list(class_wise_distribution.values())
labels = ["mst"+str(i) for i in list(class_wise_distribution.keys())]

In [None]:
skin_tone_colors = [
    "#F1C27D",  
    "#E2B97F",  
    "#C68642",  
    "#8D4F2E",  
    "#5D2A2D"  
]

colors = skin_tone_colors[:len(labels)]
plt.figure(figsize=(10, 5))
plt.title("Class Wise Dataset Distribution", fontsize=16)
plt.bar(labels, classes, color=colors, edgecolor='black')  
plt.plot(labels, classes, marker='o', color='black', linestyle='-', linewidth=2, markersize=8, label='Class Distribution')

plt.xlabel("Skin Tone Classes", fontsize=14)
plt.ylabel("Number of Images", fontsize=14)

plt.xticks(labels, fontsize=12)
plt.yticks(fontsize=12)
plt.grid(axis='y', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()  

In [None]:
plt.bar("Total Images",total_images)
plt.title("Total Images Present In Dataset")

In [None]:
def Load_Data(organize_data):
    labels_list=[]
    images_list=[]
    for label, images in organize_data.items():
        for image_path in images:
            img = cv2.imread(image_path)
            img = cv2.resize(img,(224,224))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = img/255.0
            images_list.append(img)
            labels_list.append(label)
    labels_list=np.array(labels_list)
    images_list=np.array(images_list)
    return labels_list, images_list

In [None]:
Y, X = Load_Data(organize_data)

X.shape,Y.shape

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state = 42 , test_size= 0.2)

## Have to set this

In [None]:
num_classes = np.max(Y_train) + 1      
Y_train = to_categorical(Y_train, num_classes)
Y_test = to_categorical(Y_test, num_classes)

In [None]:
def get_augmentation_layer():
    return Sequential([
        RandomFlip("horizontal_and_vertical"),
        RandomRotation(0.1),
        RandomZoom(0.1),
        RandomContrast(0.1),
        RandomTranslation(0.1, 0.1),
    ], name="data_augmentation")

In [None]:
def Compile_train(model,X_train,Y_train, X_test, Y_test):
    model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy', Precision(), Recall()])
    early_stop = EarlyStopping(monitor='val_loss',
                           patience=3,          
                           restore_best_weights=True)
    history = model.fit(X_train, Y_train,
                    epochs=50,
                    batch_size=16,
                    validation_data=(X_test, Y_test),
                    callbacks=[early_stop])
    return model, history

In [None]:
model = Sequential([
    Input(shape=(224, 224, 3)),
    RandomFlip("horizontal_and_vertical"),
    RandomRotation(0.1),
    RandomZoom(0.1),
    RandomContrast(0.1),

    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),

    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])
model.summary()

In [None]:
cnn_model, cnn_history = Compile_train(model, X_train,Y_train,X_test,Y_test)

In [None]:
def plot_accuracy(history,model_name,save_dir="model_results"):
    plt.figure(figsize=(12, 6))
    plt.plot(history['accuracy'], label='Train Accuracy')
    plt.plot(history['val_accuracy'], label='Val Accuracy')
    plt.title(model_name+'Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend()
    plt.grid(True)
    name=model_name+"_accuracy.png"
    path=os.path.join(save_dir,name)
    plt.savefig(path)
    plt.show()
    
def plot_loss(history,model_name,save_dir="model_results"):
    plt.figure(figsize=(12, 6))
    plt.plot(history['loss'], label='Train Loss')
    plt.plot(history['val_loss'], label='Val Loss')
    plt.title(model_name+'Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend()
    plt.grid(True)
    name=model_name+"_loss.png"
    path=os.path.join(save_dir,name)
    plt.savefig(path)
    plt.show()


def evaluate_model(model, model_name, X_test, Y_test, save_dir="model_results"):
    """
    Evaluate the given model by generating classification report and plotting/saving confusion matrix.

    Parameters:
    - model: Trained model (e.g., a Keras CNN model)
    - model_name: Name of the model (used in plot title and saved file name)
    - X_test: Test features (numpy array)
    - Y_test: Test labels (one-hot encoded)
    - save_dir: Directory to save the confusion matrix image (default: 'confusion_matrices')
    """

    os.makedirs(save_dir, exist_ok=True)

    Y_pred = model.predict(X_test)
    Y_pred_classes = np.argmax(Y_pred, axis=1)
    Y_test_classes = np.argmax(Y_test, axis=1)

    conf_matrix = confusion_matrix(Y_test_classes, Y_pred_classes)

   
    plt.figure(figsize=(8, 6))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
                xticklabels=np.unique(Y_test_classes),
                yticklabels=np.unique(Y_test_classes))
    plt.title(f'Confusion Matrix - {model_name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    model.save("models"+"/"+model_name+"_model.keras")
    filename = f"confusionmatrix_{model_name.replace(' ', '_')}.png"
    filepath = os.path.join(save_dir, filename)
    plt.savefig(filepath)
    plt.show()

In [None]:
history=cnn_history.history
plot_accuracy(history,"cnn")
plot_loss(history,"cnn")
evaluate_model(cnn_model, "cnn", X_test, Y_test)

## DenseNet Model

In [None]:
base_model = DenseNet121(include_top=False, weights='imagenet', input_shape=(224, 224, 3))
base_model.trainable = False  # Freeze base model

inputs = Input(shape=(224, 224, 3))
x = base_model(inputs, training=False)
x = get_augmentation_layer()(x)
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
outputs = Dense(num_classes, activation='softmax')(x)

densenet_model = Model(inputs, outputs)
densenet_model.summary()

In [None]:
history=densenet_history.history
plot_accuracy(history,"densenet")
plot_loss(history,"densenet")
evaluate_model(densenet_model, "densenet", X_test, Y_test)