In [5]:
#DenseNet121 classification
import numpy as np
import os
from keras.applications.densenet import DenseNet121, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import regularizers
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, cohen_kappa_score
import seaborn as sns

# Define paths
train_val_dir = r"C:\Users\nanda\OneDrive\Desktop\HyperKvasir Classification"

# Data preparation
datagen = ImageDataGenerator(rescale=1./255)

# Define classes
classes = ['Mayo 0','Mayo 1','Mayo 2','Mayo 3']

# Load data with correct class_mode='sparse'
train_generator = datagen.flow_from_directory(
    train_val_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='sparse',
    shuffle=True,
    classes=classes
)

# Print class indices and expected number of batches
print("Class indices:", train_generator.class_indices)
print("Expected number of batches:", len(train_generator))

# Load MobileNet model without the top layers
base_model = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features
def extract_features(generator, model, num_augmentations=5):
    features = []
    labels = []
    num_samples = len(generator.filenames)
    num_batches = num_samples // generator.batch_size + (1 if num_samples % generator.batch_size else 0)

    for _ in range(num_augmentations):
        generator.reset()
        for _ in range(num_batches):
            inputs_batch, labels_batch = next(generator)
            features_batch = model.predict(inputs_batch)
            features.append(features_batch)
            labels.append(labels_batch)

    return np.vstack(features), np.concatenate(labels)

train_features, train_labels = extract_features(train_generator, model)

# Troubleshooting: Check unique classes and their distribution
print("Unique classes:", np.unique(train_labels))
unique, counts = np.unique(train_labels, return_counts=True)
print("Class distribution:", dict(zip(unique, counts)))

# Initial train-test split
x_train_val, x_test, y_train_val, y_test = train_test_split(train_features, train_labels, test_size=0.1, random_state=42)

# Train-validation split
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size=0.2222, random_state=42)

# Reshape features for SMOTE (only for training set)
n_samples, x, y, z = x_train.shape
x_train_reshaped = x_train.reshape((n_samples, x * y * z))

# Apply SMOTE only on the training set
if len(np.unique(y_train)) > 1:
    smote = SMOTE(random_state=42)
    x_train_smote, y_train_smote = smote.fit_resample(x_train_reshaped, y_train)
    # Reshape back to original feature shape
    x_train_smote = x_train_smote.reshape((-1, x, y, z))
else:
    print("Skipping SMOTE due to insufficient classes")
    x_train_smote = x_train
    y_train_smote = y_train

# Print shapes for verification
print("x_train shape:", x_train_smote.shape)
print("y_train shape:", y_train_smote.shape)
print("x_val shape:", x_val.shape)
print("y_val shape:", y_val.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

# Create MobileNet model
def create_mobilenet_model(input_shape):
    inputs = Input(shape=input_shape)
    x = GlobalAveragePooling2D()(inputs)
    x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = Dropout(0.7)(x)
    predictions = Dense(4, activation='softmax')(x)  # 4 output neurons for 4 categories
    model = Model(inputs=inputs, outputs=predictions)
    return model

# Get the shape of the extracted features
input_shape = train_features.shape[1:]

model = create_mobilenet_model(input_shape)
model.compile(optimizer=Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callbacks
model_save_path = r'C:\Users\nanda\OneDrive\Desktop\densenet_legit_compleltly_smote_model_900_classification_final_2.keras'
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint(model_save_path, monitor='val_loss', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)

# Train the model with augmented data
history = model.fit(
    x_train_smote, y_train_smote,
    validation_data=(x_val, y_val),
    epochs=900,
    callbacks=[early_stop, checkpoint, reduce_lr]
)

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs. Validation Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

# Save the final model
final_model_save_path = r'C:\Users\nanda\OneDrive\Desktop\densenet_legit_compleltly_smote_model_900_classification_final_2.keras'
model.save(final_model_save_path)

# Predict probabilities for each class
y_pred_prob = model.predict(x_test)

# Predicted class labels (argmax gives the class with highest probability)
y_pred = np.argmax(y_pred_prob, axis=1)

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

# Display visual confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Calculate and print classification report
print('Classification Report:')
print(classification_report(y_test, y_pred, target_names=classes))

# Calculate F1 score, precision, and recall (sensitivity) for each class
f1 = f1_score(y_test, y_pred, average=None)
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)

# Calculate specificity for each class
def specificity_score(y_true, y_pred, class_label):
    cm = confusion_matrix(y_true, y_pred)
    tn = cm.sum() - (cm[class_label].sum() + cm[:, class_label].sum() - cm[class_label, class_label])
    fp = cm[:, class_label].sum() - cm[class_label, class_label]
    return tn / (tn + fp)

specificity = [specificity_score(y_test, y_pred, i) for i in range(len(classes))]

# Print results for each class
print("\nMetrics for each class:")
for i, class_name in enumerate(classes):
    print(f"\n{class_name}:")
    print(f"F1 Score: {f1[i]:.4f}")
    print(f"Precision: {precision[i]:.4f}")
    print(f"Sensitivity (Recall): {recall[i]:.4f}")
    print(f"Specificity: {specificity[i]:.4f}")

# Calculate Cohen's Kappa coefficient
kappa = cohen_kappa_score(y_test, y_pred)
print(f"\nCohen's Kappa Coefficient: {kappa:.4f}")

# Plot ROC curve and calculate AUC for each class
plt.figure(figsize=(10, 8))
for i in range(len(classes)):
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob[:, i], pos_label=i)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{classes[i]} ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

Device: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), Name: NVIDIA GeForce RTX 4060 Laptop GPU


In [None]:
#InceptionV3 0-1 classification
import numpy as np
import os
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import regularizers
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, cohen_kappa_score
import seaborn as sns

# Define paths
train_val_dir = r"C:\Users\nanda\OneDrive\Desktop\0-1 classification"

# Data preparation
datagen = ImageDataGenerator(rescale=1./255)

# Define classes
classes = ['Mayo 0','Mayo 1']

# Load data with correct class_mode='sparse'
train_generator = datagen.flow_from_directory(
    train_val_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='sparse',
    shuffle=True,
    classes=classes
)

# Print class indices and expected number of batches
print("Class indices:", train_generator.class_indices)
print("Expected number of batches:", len(train_generator))

# Load MobileNet model without the top layers
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features
def extract_features(generator, model, num_augmentations=5):
    features = []
    labels = []
    num_samples = len(generator.filenames)
    num_batches = num_samples // generator.batch_size + (1 if num_samples % generator.batch_size else 0)

    for _ in range(num_augmentations):
        generator.reset()
        for _ in range(num_batches):
            inputs_batch, labels_batch = next(generator)
            features_batch = model.predict(inputs_batch)
            features.append(features_batch)
            labels.append(labels_batch)

    return np.vstack(features), np.concatenate(labels)

train_features, train_labels = extract_features(train_generator, model)

# Troubleshooting: Check unique classes and their distribution
print("Unique classes:", np.unique(train_labels))
unique, counts = np.unique(train_labels, return_counts=True)
print("Class distribution:", dict(zip(unique, counts)))

# Initial train-test split
x_train_val, x_test, y_train_val, y_test = train_test_split(train_features, train_labels, test_size=0.1, random_state=42)

# Train-validation split
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size=0.2222, random_state=42)

# Reshape features for SMOTE (only for training set)
n_samples, x, y, z = x_train.shape
x_train_reshaped = x_train.reshape((n_samples, x * y * z))

# Apply SMOTE only on the training set
if len(np.unique(y_train)) > 1:
    smote = SMOTE(random_state=42)
    x_train_smote, y_train_smote = smote.fit_resample(x_train_reshaped, y_train)
    # Reshape back to original feature shape
    x_train_smote = x_train_smote.reshape((-1, x, y, z))
else:
    print("Skipping SMOTE due to insufficient classes")
    x_train_smote = x_train
    y_train_smote = y_train

# Print shapes for verification
print("x_train shape:", x_train_smote.shape)
print("y_train shape:", y_train_smote.shape)
print("x_val shape:", x_val.shape)
print("y_val shape:", y_val.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

# Create MobileNet model
def create_mobilenet_model(input_shape):
    inputs = Input(shape=input_shape)
    x = GlobalAveragePooling2D()(inputs)
    x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = Dropout(0.7)(x)
    predictions = Dense(2, activation='softmax')(x)  # 2 output neurons for 2 categories
    model = Model(inputs=inputs, outputs=predictions)
    return model

# Get the shape of the extracted features
input_shape = train_features.shape[1:]

model = create_mobilenet_model(input_shape)
model.compile(optimizer=Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callbacks
model_save_path = r'C:\Users\nanda\OneDrive\Desktop\inceptionv3_legit_0-1_compleltly_smote_model_900_classification_final_2.keras'
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint(model_save_path, monitor='val_loss', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)

# Train the model with augmented data
history = model.fit(
    x_train_smote, y_train_smote,
    validation_data=(x_val, y_val),
    epochs=900,
    callbacks=[early_stop, checkpoint, reduce_lr]
)

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs. Validation Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

# Save the final model
final_model_save_path = r'C:\Users\nanda\OneDrive\Desktop\inceptionv3_legit_0-1_compleltly_smote_model_900_classification_final_2.keras'
model.save(final_model_save_path)

# Predict probabilities for each class
y_pred_prob = model.predict(x_test)

# Predicted class labels (argmax gives the class with highest probability)
y_pred = np.argmax(y_pred_prob, axis=1)

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

# Display visual confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Calculate and print classification report
print('Classification Report:')
print(classification_report(y_test, y_pred, target_names=classes))

# Calculate F1 score, precision, and recall (sensitivity) for each class
f1 = f1_score(y_test, y_pred, average=None)
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)

# Calculate specificity for each class
def specificity_score(y_true, y_pred, class_label):
    cm = confusion_matrix(y_true, y_pred)
    tn = cm.sum() - (cm[class_label].sum() + cm[:, class_label].sum() - cm[class_label, class_label])
    fp = cm[:, class_label].sum() - cm[class_label, class_label]
    return tn / (tn + fp)

specificity = [specificity_score(y_test, y_pred, i) for i in range(len(classes))]

# Print results for each class
print("\nMetrics for each class:")
for i, class_name in enumerate(classes):
    print(f"\n{class_name}:")
    print(f"F1 Score: {f1[i]:.4f}")
    print(f"Precision: {precision[i]:.4f}")
    print(f"Sensitivity (Recall): {recall[i]:.4f}")
    print(f"Specificity: {specificity[i]:.4f}")

# Calculate Cohen's Kappa coefficient
kappa = cohen_kappa_score(y_test, y_pred)
print(f"\nCohen's Kappa Coefficient: {kappa:.4f}")

# Plot ROC curve and calculate AUC for each class
plt.figure(figsize=(10, 8))
for i in range(len(classes)):
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob[:, i], pos_label=i)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{classes[i]} ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
#Predicting the class of any random image by using voting mechanism
import numpy as np
import tensorflow as tf
from keras.models import load_model
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from keras.applications.inception_v3 import InceptionV3, preprocess_input as preprocess_inception
from keras.applications.densenet import DenseNet121, preprocess_input as preprocess_densenet
from keras.applications.mobilenet import MobileNet, preprocess_input as preprocess_mobilenet

# Load the saved models
inceptionv3_model = load_model(r"C:\Users\nanda\OneDrive\Desktop\Internship-Deep Learning\Finalized Models\inceptionv3_legit_compleltly_smote_model_900_classification_final_1.keras")
densenet_model = load_model(r"C:\Users\nanda\OneDrive\Desktop\Internship-Deep Learning\Finalized Models\densenet_legit_compleltly_smote_model_900_classification_final_2.keras")
mobilenet_model = load_model(r"C:\Users\nanda\OneDrive\Desktop\Internship-Deep Learning\Finalized Models\mobilenet_legit_compleltly_smote_model_900_classification_final_2.keras")

# Load the additional specialized models
inceptionv3_01_model = load_model(r"C:\Users\nanda\OneDrive\Desktop\Internship-Deep Learning\Finalized Models\inceptionv3_legit_0-1_compleltly_smote_model_900_classification_final_2.keras")
inceptionv3_23_model = load_model(r"C:\Users\nanda\OneDrive\Desktop\Internship-Deep Learning\Finalized Models\inceptionv3_legit_2-3_compleltly_smote_model_900_classification_final_2.keras")
mobilenet_01_model = load_model(r"C:\Users\nanda\OneDrive\Desktop\Internship-Deep Learning\Finalized Models\mobilenet_legit_0-1_compleltly_smote_model_900_classification_final_2.keras")


# Load base models for feature extraction
inceptionv3_base = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))
densenet_base = DenseNet121(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
mobilenet_base = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

classes = ['Mayo 0', 'Mayo 1', 'Mayo 2', 'Mayo 3']

def resize_features(features, target_size):
    return tf.image.resize(features, target_size)

def predict_with_model(model, base_model, img_path, target_size, preprocess_func, feature_target_size=None):
    img = load_img(img_path, target_size=target_size)
    img_array = img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_func(img_array)
    
    features = base_model.predict(img_array)
    
    if feature_target_size:
        features = resize_features(features, feature_target_size)
    
    prediction = model.predict(features)
    return prediction[0]

def voting_prediction(img_path):
    inceptionv3_pred = predict_with_model(inceptionv3_model, inceptionv3_base, img_path, (299, 299), preprocess_inception, (5, 5))
    densenet_pred = predict_with_model(densenet_model, densenet_base, img_path, (224, 224), preprocess_densenet)
    mobilenet_pred = predict_with_model(mobilenet_model, mobilenet_base, img_path, (224, 224), preprocess_mobilenet)
    
    # Predictions from specialized models
    inceptionv3_01_pred = predict_with_model(inceptionv3_01_model, inceptionv3_base, img_path, (299, 299), preprocess_inception, (5, 5))
    inceptionv3_23_pred = predict_with_model(inceptionv3_23_model, inceptionv3_base, img_path, (299, 299), preprocess_inception, (5, 5))
    mobilenet_01_pred = predict_with_model(mobilenet_01_model, mobilenet_base, img_path, (224, 224), preprocess_mobilenet)
    
    
    # Combine predictions
    combined_pred = np.zeros(4)
    
    # For Mayo 0 and 1
    combined_pred[:2] = (inceptionv3_pred[:2] + densenet_pred[:2] + mobilenet_pred[:2] + 
                         inceptionv3_01_pred + mobilenet_01_pred) / 5
    
    # For Mayo 2 and 3
    combined_pred[2:] = (inceptionv3_pred[2:] + densenet_pred[2:] + mobilenet_pred[2:] + 
                         inceptionv3_23_pred) / 4  # Removed mobilenet_23_pred
    
    # Get the predicted label and confidence score
    predicted_class = np.argmax(combined_pred)
    predicted_label = classes[predicted_class]
    confidence = combined_pred[predicted_class]
    
    return (inceptionv3_pred, densenet_pred, mobilenet_pred, 
            inceptionv3_01_pred, inceptionv3_23_pred, 
            mobilenet_01_pred, 
            combined_pred, predicted_label, confidence)

# Example usage:
image_path = r"C:\Users\nanda\OneDrive\Desktop\Internship-Deep Learning\Internet Images\Mayo1internet.png"
(inceptionv3_pred, densenet_pred, mobilenet_pred, 
 inceptionv3_01_pred, inceptionv3_23_pred, 
 mobilenet_01_pred, 
 combined_pred, predicted_label, confidence) = voting_prediction(image_path)

# Print results
print(f'Predicted Label: {predicted_label}')
print(f'Confidence: {confidence}')

# Print probabilities for each class
for i, class_name in enumerate(classes):
    print(f"\n{class_name} probabilities:")
    print(f"InceptionV3: {inceptionv3_pred[i]:.4f}")
    print(f"DenseNet: {densenet_pred[i]:.4f}")
    print(f"MobileNet: {mobilenet_pred[i]:.4f}")
    print(f"InceptionV3 Mayo 0/1: {inceptionv3_01_pred[i%2]:.4f}")
    print(f"InceptionV3 Mayo 2/3: {inceptionv3_23_pred[i%2]:.4f}")
    print(f"MobileNet Mayo 0/1: {mobilenet_01_pred[i%2]:.4f}")
    print(f"Combined: {combined_pred[i]:.4f}")


In [None]:
#MobileNet 0-1 classification
import numpy as np
import os
from keras.applications.mobilenet import MobileNet, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import regularizers
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, cohen_kappa_score
import seaborn as sns

# Define paths
train_val_dir = r"C:\Users\nanda\OneDrive\Desktop\0-1 classification"

# Data preparation
datagen = ImageDataGenerator(rescale=1./255)

# Define classes
classes = ['Mayo 0','Mayo 1']

# Load data with correct class_mode='sparse'
train_generator = datagen.flow_from_directory(
    train_val_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='sparse',
    shuffle=True,
    classes=classes
)

# Print class indices and expected number of batches
print("Class indices:", train_generator.class_indices)
print("Expected number of batches:", len(train_generator))

# Load MobileNet model without the top layers
base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features
def extract_features(generator, model, num_augmentations=5):
    features = []
    labels = []
    num_samples = len(generator.filenames)
    num_batches = num_samples // generator.batch_size + (1 if num_samples % generator.batch_size else 0)

    for _ in range(num_augmentations):
        generator.reset()
        for _ in range(num_batches):
            inputs_batch, labels_batch = next(generator)
            features_batch = model.predict(inputs_batch)
            features.append(features_batch)
            labels.append(labels_batch)

    return np.vstack(features), np.concatenate(labels)

train_features, train_labels = extract_features(train_generator, model)

# Troubleshooting: Check unique classes and their distribution
print("Unique classes:", np.unique(train_labels))
unique, counts = np.unique(train_labels, return_counts=True)
print("Class distribution:", dict(zip(unique, counts)))

# Initial train-test split
x_train_val, x_test, y_train_val, y_test = train_test_split(train_features, train_labels, test_size=0.1, random_state=42)

# Train-validation split
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size=0.2222, random_state=42)

# Reshape features for SMOTE (only for training set)
n_samples, x, y, z = x_train.shape
x_train_reshaped = x_train.reshape((n_samples, x * y * z))

# Apply SMOTE only on the training set
if len(np.unique(y_train)) > 1:
    smote = SMOTE(random_state=42)
    x_train_smote, y_train_smote = smote.fit_resample(x_train_reshaped, y_train)
    # Reshape back to original feature shape
    x_train_smote = x_train_smote.reshape((-1, x, y, z))
else:
    print("Skipping SMOTE due to insufficient classes")
    x_train_smote = x_train
    y_train_smote = y_train

# Print shapes for verification
print("x_train shape:", x_train_smote.shape)
print("y_train shape:", y_train_smote.shape)
print("x_val shape:", x_val.shape)
print("y_val shape:", y_val.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

# Create MobileNet model
def create_mobilenet_model(input_shape):
    inputs = Input(shape=input_shape)
    x = GlobalAveragePooling2D()(inputs)
    x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = Dropout(0.7)(x)
    predictions = Dense(2, activation='softmax')(x)  # 2 output neurons for 2 categories
    model = Model(inputs=inputs, outputs=predictions)
    return model

# Get the shape of the extracted features
input_shape = train_features.shape[1:]

model = create_mobilenet_model(input_shape)
model.compile(optimizer=Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callbacks
model_save_path = r'C:\Users\nanda\OneDrive\Desktop\mobilenet_legit_0-1_compleltly_smote_model_900_classification_final_2.keras'
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint(model_save_path, monitor='val_loss', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)

# Train the model with augmented data
history = model.fit(
    x_train_smote, y_train_smote,
    validation_data=(x_val, y_val),
    epochs=900,
    callbacks=[early_stop, checkpoint, reduce_lr]
)

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs. Validation Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

# Save the final model
final_model_save_path = r'C:\Users\nanda\OneDrive\Desktop\mobilenet_legit_0-1_compleltly_smote_model_900_classification_final_2.keras'
model.save(final_model_save_path)

# Predict probabilities for each class
y_pred_prob = model.predict(x_test)

# Predicted class labels (argmax gives the class with highest probability)
y_pred = np.argmax(y_pred_prob, axis=1)

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

# Display visual confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Calculate and print classification report
print('Classification Report:')
print(classification_report(y_test, y_pred, target_names=classes))

# Calculate F1 score, precision, and recall (sensitivity) for each class
f1 = f1_score(y_test, y_pred, average=None)
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)

# Calculate specificity for each class
def specificity_score(y_true, y_pred, class_label):
    cm = confusion_matrix(y_true, y_pred)
    tn = cm.sum() - (cm[class_label].sum() + cm[:, class_label].sum() - cm[class_label, class_label])
    fp = cm[:, class_label].sum() - cm[class_label, class_label]
    return tn / (tn + fp)

specificity = [specificity_score(y_test, y_pred, i) for i in range(len(classes))]

# Print results for each class
print("\nMetrics for each class:")
for i, class_name in enumerate(classes):
    print(f"\n{class_name}:")
    print(f"F1 Score: {f1[i]:.4f}")
    print(f"Precision: {precision[i]:.4f}")
    print(f"Sensitivity (Recall): {recall[i]:.4f}")
    print(f"Specificity: {specificity[i]:.4f}")

# Calculate Cohen's Kappa coefficient
kappa = cohen_kappa_score(y_test, y_pred)
print(f"\nCohen's Kappa Coefficient: {kappa:.4f}")

# Plot ROC curve and calculate AUC for each class
plt.figure(figsize=(10, 8))
for i in range(len(classes)):
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob[:, i], pos_label=i)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{classes[i]} ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
#InceptionV3 2-3 classification
import numpy as np
import os
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import regularizers
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, cohen_kappa_score
import seaborn as sns

# Define paths
train_val_dir = r"C:\Users\nanda\OneDrive\Desktop\2-3 classification"

# Data preparation
datagen = ImageDataGenerator(rescale=1./255)

# Define classes
classes = ['Mayo 2','Mayo 3']

# Load data with correct class_mode='sparse'
train_generator = datagen.flow_from_directory(
    train_val_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='sparse',
    shuffle=True,
    classes=classes
)

# Print class indices and expected number of batches
print("Class indices:", train_generator.class_indices)
print("Expected number of batches:", len(train_generator))

# Load MobileNet model without the top layers
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features
def extract_features(generator, model, num_augmentations=5):
    features = []
    labels = []
    num_samples = len(generator.filenames)
    num_batches = num_samples // generator.batch_size + (1 if num_samples % generator.batch_size else 0)

    for _ in range(num_augmentations):
        generator.reset()
        for _ in range(num_batches):
            inputs_batch, labels_batch = next(generator)
            features_batch = model.predict(inputs_batch)
            features.append(features_batch)
            labels.append(labels_batch)

    return np.vstack(features), np.concatenate(labels)

train_features, train_labels = extract_features(train_generator, model)

# Troubleshooting: Check unique classes and their distribution
print("Unique classes:", np.unique(train_labels))
unique, counts = np.unique(train_labels, return_counts=True)
print("Class distribution:", dict(zip(unique, counts)))

# Initial train-test split
x_train_val, x_test, y_train_val, y_test = train_test_split(train_features, train_labels, test_size=0.1, random_state=42)

# Train-validation split
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size=0.2222, random_state=42)

# Reshape features for SMOTE (only for training set)
n_samples, x, y, z = x_train.shape
x_train_reshaped = x_train.reshape((n_samples, x * y * z))

# Apply SMOTE only on the training set
if len(np.unique(y_train)) > 1:
    smote = SMOTE(random_state=42)
    x_train_smote, y_train_smote = smote.fit_resample(x_train_reshaped, y_train)
    # Reshape back to original feature shape
    x_train_smote = x_train_smote.reshape((-1, x, y, z))
else:
    print("Skipping SMOTE due to insufficient classes")
    x_train_smote = x_train
    y_train_smote = y_train

# Print shapes for verification
print("x_train shape:", x_train_smote.shape)
print("y_train shape:", y_train_smote.shape)
print("x_val shape:", x_val.shape)
print("y_val shape:", y_val.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

# Create MobileNet model
def create_inceptionv3_model(input_shape):
    inputs = Input(shape=input_shape)
    x = GlobalAveragePooling2D()(inputs)
    x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = Dropout(0.7)(x)
    predictions = Dense(2, activation='softmax')(x)  # 2 output neurons for 2 categories
    model = Model(inputs=inputs, outputs=predictions)
    return model

# Get the shape of the extracted features
input_shape = train_features.shape[1:]

model = create_inceptionv3_model(input_shape)
model.compile(optimizer=Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callbacks
model_save_path = r'C:\Users\nanda\OneDrive\Desktop\inceptionv3_legit_2-3_compleltly_smote_model_900_classification_final_2.keras'
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint(model_save_path, monitor='val_loss', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)

# Train the model with augmented data
history = model.fit(
    x_train_smote, y_train_smote,
    validation_data=(x_val, y_val),
    epochs=900,
    callbacks=[early_stop, checkpoint, reduce_lr]
)

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs. Validation Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

# Save the final model
final_model_save_path = r'C:\Users\nanda\OneDrive\Desktop\inceptionv3_legit_2-3_compleltly_smote_model_900_classification_final_2.keras'
model.save(final_model_save_path)

# Predict probabilities for each class
y_pred_prob = model.predict(x_test)

# Predicted class labels (argmax gives the class with highest probability)
y_pred = np.argmax(y_pred_prob, axis=1)

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

# Display visual confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Calculate and print classification report
print('Classification Report:')
print(classification_report(y_test, y_pred, target_names=classes))

# Calculate F1 score, precision, and recall (sensitivity) for each class
f1 = f1_score(y_test, y_pred, average=None)
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)

# Calculate specificity for each class
def specificity_score(y_true, y_pred, class_label):
    cm = confusion_matrix(y_true, y_pred)
    tn = cm.sum() - (cm[class_label].sum() + cm[:, class_label].sum() - cm[class_label, class_label])
    fp = cm[:, class_label].sum() - cm[class_label, class_label]
    return tn / (tn + fp)

specificity = [specificity_score(y_test, y_pred, i) for i in range(len(classes))]

# Print results for each class
print("\nMetrics for each class:")
for i, class_name in enumerate(classes):
    print(f"\n{class_name}:")
    print(f"F1 Score: {f1[i]:.4f}")
    print(f"Precision: {precision[i]:.4f}")
    print(f"Sensitivity (Recall): {recall[i]:.4f}")
    print(f"Specificity: {specificity[i]:.4f}")

# Calculate Cohen's Kappa coefficient
kappa = cohen_kappa_score(y_test, y_pred)
print(f"\nCohen's Kappa Coefficient: {kappa:.4f}")

# Plot ROC curve and calculate AUC for each class
plt.figure(figsize=(10, 8))
for i in range(len(classes)):
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob[:, i], pos_label=i)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{classes[i]} ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
#MobileNet classification
import numpy as np
import os
from keras.applications.mobilenet import MobileNet, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import regularizers
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, cohen_kappa_score
import seaborn as sns

# Define paths
train_val_dir = r"C:\Users\nanda\OneDrive\Desktop\HyperKvasir Classification"

# Data preparation
datagen = ImageDataGenerator(rescale=1./255)

# Define classes
classes = ['Mayo 0','Mayo 1','Mayo 2','Mayo 3']

# Load data with correct class_mode='sparse'
train_generator = datagen.flow_from_directory(
    train_val_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='sparse',
    shuffle=True,
    classes=classes
)

# Print class indices and expected number of batches
print("Class indices:", train_generator.class_indices)
print("Expected number of batches:", len(train_generator))

# Load MobileNet model without the top layers
base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features
def extract_features(generator, model, num_augmentations=5):
    features = []
    labels = []
    num_samples = len(generator.filenames)
    num_batches = num_samples // generator.batch_size + (1 if num_samples % generator.batch_size else 0)

    for _ in range(num_augmentations):
        generator.reset()
        for _ in range(num_batches):
            inputs_batch, labels_batch = next(generator)
            features_batch = model.predict(inputs_batch)
            features.append(features_batch)
            labels.append(labels_batch)

    return np.vstack(features), np.concatenate(labels)

train_features, train_labels = extract_features(train_generator, model)

# Troubleshooting: Check unique classes and their distribution
print("Unique classes:", np.unique(train_labels))
unique, counts = np.unique(train_labels, return_counts=True)
print("Class distribution:", dict(zip(unique, counts)))

# Initial train-test split
x_train_val, x_test, y_train_val, y_test = train_test_split(train_features, train_labels, test_size=0.1, random_state=42)

# Train-validation split
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size=0.2222, random_state=42)

# Reshape features for SMOTE (only for training set)
n_samples, x, y, z = x_train.shape
x_train_reshaped = x_train.reshape((n_samples, x * y * z))

# Apply SMOTE only on the training set
if len(np.unique(y_train)) > 1:
    smote = SMOTE(random_state=42)
    x_train_smote, y_train_smote = smote.fit_resample(x_train_reshaped, y_train)
    # Reshape back to original feature shape
    x_train_smote = x_train_smote.reshape((-1, x, y, z))
else:
    print("Skipping SMOTE due to insufficient classes")
    x_train_smote = x_train
    y_train_smote = y_train

# Print shapes for verification
print("x_train shape:", x_train_smote.shape)
print("y_train shape:", y_train_smote.shape)
print("x_val shape:", x_val.shape)
print("y_val shape:", y_val.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

# Create MobileNet model
def create_mobilenet_model(input_shape):
    inputs = Input(shape=input_shape)
    x = GlobalAveragePooling2D()(inputs)
    x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = Dropout(0.7)(x)
    predictions = Dense(4, activation='softmax')(x)  # 4 output neurons for 4 categories
    model = Model(inputs=inputs, outputs=predictions)
    return model

# Get the shape of the extracted features
input_shape = train_features.shape[1:]

model = create_mobilenet_model(input_shape)
model.compile(optimizer=Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callbacks
model_save_path = r'C:\Users\nanda\OneDrive\Desktop\mobilenet_legit_compleltly_smote_model_900_classification_final_2.keras'
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint(model_save_path, monitor='val_loss', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)

# Train the model with augmented data
history = model.fit(
    x_train_smote, y_train_smote,
    validation_data=(x_val, y_val),
    epochs=900,
    callbacks=[early_stop, checkpoint, reduce_lr]
)

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs. Validation Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

# Save the final model
final_model_save_path = r'C:\Users\nanda\OneDrive\Desktop\mobilenet_legit_compleltly_smote_model_900_classification_final_2.keras'
model.save(final_model_save_path)

# Predict probabilities for each class
y_pred_prob = model.predict(x_test)

# Predicted class labels (argmax gives the class with highest probability)
y_pred = np.argmax(y_pred_prob, axis=1)

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

# Display visual confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Calculate and print classification report
print('Classification Report:')
print(classification_report(y_test, y_pred, target_names=classes))

# Calculate F1 score, precision, and recall (sensitivity) for each class
f1 = f1_score(y_test, y_pred, average=None)
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)

# Calculate specificity for each class
def specificity_score(y_true, y_pred, class_label):
    cm = confusion_matrix(y_true, y_pred)
    tn = cm.sum() - (cm[class_label].sum() + cm[:, class_label].sum() - cm[class_label, class_label])
    fp = cm[:, class_label].sum() - cm[class_label, class_label]
    return tn / (tn + fp)

specificity = [specificity_score(y_test, y_pred, i) for i in range(len(classes))]

# Print results for each class
print("\nMetrics for each class:")
for i, class_name in enumerate(classes):
    print(f"\n{class_name}:")
    print(f"F1 Score: {f1[i]:.4f}")
    print(f"Precision: {precision[i]:.4f}")
    print(f"Sensitivity (Recall): {recall[i]:.4f}")
    print(f"Specificity: {specificity[i]:.4f}")

# Calculate Cohen's Kappa coefficient
kappa = cohen_kappa_score(y_test, y_pred)
print(f"\nCohen's Kappa Coefficient: {kappa:.4f}")

# Plot ROC curve and calculate AUC for each class
plt.figure(figsize=(10, 8))
for i in range(len(classes)):
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob[:, i], pos_label=i)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{classes[i]} ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
#InceptionV3 classification
import numpy as np
import os
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout, Input
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from keras import regularizers
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report, roc_curve, auc, cohen_kappa_score
import seaborn as sns

# Define paths
train_val_dir = r"C:\Users\nanda\OneDrive\Desktop\HyperKvasir Classification"

# Data preparation
datagen = ImageDataGenerator(rescale=1./255)

# Define classes
classes = ['Mayo 0','Mayo 1','Mayo 2','Mayo 3']

# Load data with correct class_mode='sparse'
train_generator = datagen.flow_from_directory(
    train_val_dir,
    target_size=(224, 224),
    batch_size=16,
    class_mode='sparse',
    shuffle=True,
    classes=classes
)

# Print class indices and expected number of batches
print("Class indices:", train_generator.class_indices)
print("Expected number of batches:", len(train_generator))

# Load MobileNet model without the top layers
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
model = Model(inputs=base_model.input, outputs=base_model.output)

# Extract features
def extract_features(generator, model, num_augmentations=5):
    features = []
    labels = []
    num_samples = len(generator.filenames)
    num_batches = num_samples // generator.batch_size + (1 if num_samples % generator.batch_size else 0)

    for _ in range(num_augmentations):
        generator.reset()
        for _ in range(num_batches):
            inputs_batch, labels_batch = next(generator)
            features_batch = model.predict(inputs_batch)
            features.append(features_batch)
            labels.append(labels_batch)

    return np.vstack(features), np.concatenate(labels)

train_features, train_labels = extract_features(train_generator, model)

# Troubleshooting: Check unique classes and their distribution
print("Unique classes:", np.unique(train_labels))
unique, counts = np.unique(train_labels, return_counts=True)
print("Class distribution:", dict(zip(unique, counts)))

# Initial train-test split
x_train_val, x_test, y_train_val, y_test = train_test_split(train_features, train_labels, test_size=0.1, random_state=42)

# Train-validation split
x_train, x_val, y_train, y_val = train_test_split(x_train_val, y_train_val, test_size=0.2222, random_state=42)

# Reshape features for SMOTE (only for training set)
n_samples, x, y, z = x_train.shape
x_train_reshaped = x_train.reshape((n_samples, x * y * z))

# Apply SMOTE only on the training set
if len(np.unique(y_train)) > 1:
    smote = SMOTE(random_state=42)
    x_train_smote, y_train_smote = smote.fit_resample(x_train_reshaped, y_train)
    # Reshape back to original feature shape
    x_train_smote = x_train_smote.reshape((-1, x, y, z))
else:
    print("Skipping SMOTE due to insufficient classes")
    x_train_smote = x_train
    y_train_smote = y_train

# Print shapes for verification
print("x_train shape:", x_train_smote.shape)
print("y_train shape:", y_train_smote.shape)
print("x_val shape:", x_val.shape)
print("y_val shape:", y_val.shape)
print("x_test shape:", x_test.shape)
print("y_test shape:", y_test.shape)

# Create InceptionV3 model
def create_inceptionv3_model(input_shape):
    inputs = Input(shape=input_shape)
    x = GlobalAveragePooling2D()(inputs)
    x = Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.01))(x)
    x = Dropout(0.7)(x)
    predictions = Dense(4, activation='softmax')(x)  # 4 output neurons for 4 categories
    model = Model(inputs=inputs, outputs=predictions)
    return model

# Get the shape of the extracted features
input_shape = train_features.shape[1:]

model = create_inceptionv3_model(input_shape)
model.compile(optimizer=Adam(learning_rate=1e-5), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Callbacks
model_save_path = r'C:\Users\nanda\OneDrive\Desktop\inceptionv3_legit_compleltly_smote_model_900_classification_final_1.keras'
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
checkpoint = ModelCheckpoint(model_save_path, monitor='val_loss', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=1e-7, verbose=1)

# Train the model with augmented data
history = model.fit(
    x_train_smote, y_train_smote,
    validation_data=(x_val, y_val),
    epochs=900,
    callbacks=[early_stop, checkpoint, reduce_lr]
)

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training vs. Validation Loss')
plt.legend()
plt.show()

# Evaluate the model on the test set
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Test accuracy: {test_acc:.4f}')

# Save the final model
final_model_save_path = r'C:\Users\nanda\OneDrive\Desktop\inceptionv3_legit_compleltly_smote_model_900_classification_final_1.keras'
model.save(final_model_save_path)

# Predict probabilities for each class
y_pred_prob = model.predict(x_test)

# Predicted class labels (argmax gives the class with highest probability)
y_pred = np.argmax(y_pred_prob, axis=1)

# Print confusion matrix
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)

# Display visual confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=classes, yticklabels=classes)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Confusion Matrix')
plt.show()

# Calculate and print classification report
print('Classification Report:')
print(classification_report(y_test, y_pred, target_names=classes))

# Calculate F1 score, precision, and recall (sensitivity) for each class
f1 = f1_score(y_test, y_pred, average=None)
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)

# Calculate specificity for each class
def specificity_score(y_true, y_pred, class_label):
    cm = confusion_matrix(y_true, y_pred)
    tn = cm.sum() - (cm[class_label].sum() + cm[:, class_label].sum() - cm[class_label, class_label])
    fp = cm[:, class_label].sum() - cm[class_label, class_label]
    return tn / (tn + fp)

specificity = [specificity_score(y_test, y_pred, i) for i in range(len(classes))]

# Print results for each class
print("\nMetrics for each class:")
for i, class_name in enumerate(classes):
    print(f"\n{class_name}:")
    print(f"F1 Score: {f1[i]:.4f}")
    print(f"Precision: {precision[i]:.4f}")
    print(f"Sensitivity (Recall): {recall[i]:.4f}")
    print(f"Specificity: {specificity[i]:.4f}")

# Calculate Cohen's Kappa coefficient
kappa = cohen_kappa_score(y_test, y_pred)
print(f"\nCohen's Kappa Coefficient: {kappa:.4f}")

# Plot ROC curve and calculate AUC for each class
plt.figure(figsize=(10, 8))
for i in range(len(classes)):
    fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob[:, i], pos_label=i)
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, lw=2, label=f'{classes[i]} ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()