In [1]:
import os
import cv2
import numpy as np
from skimage.feature import local_binary_pattern
from transformers import ViTFeatureExtractor, ViTModel
import torch
from tqdm import tqdm
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import random
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, roc_auc_score, roc_curve, auc, precision_recall_curve, log_loss, confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, roc_curve, precision_recall_curve, confusion_matrix
from transformers import ViTFeatureExtractor, ViTModel, ViTConfig
from skimage.feature import local_binary pattern
from tqdm import tqdm
import seaborn as sns
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Define additional transformer block
class ExtraTransformerBlock(nn.Module):
    def __init__(self, config):
        super(ExtraTransformerBlock, self).__init__()
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=config.hidden_size, nhead=config.num_attention_heads)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=1)  # Add one extra block

    def forward(self, hidden_states):
        return self.transformer_encoder(hidden_states)

# Modified ViT model with an additional Transformer block
class ModifiedViTModel(nn.Module):
    def __init__(self):
        super(ModifiedViTModel, self).__init__()
        # Load the pretrained ViT model
        self.vit = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
        config = ViTConfig.from_pretrained('google/vit-base-patch16-224-in21k')
        # Add extra transformer block
        self.extra_block = ExtraTransformerBlock(config)

    def forward(self, inputs):
        # Extract the original ViT features
        outputs = self.vit(**inputs)
        last_hidden_state = outputs.last_hidden_state

        # Pass through the additional transformer block
        extra_features = self.extra_block(last_hidden_state)
        return extra_features

# Initialize the ViT feature extractor and the modified ViT model
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224-in21k')
model = ModifiedViTModel()

# Function to extract ViT features
def extract_vit_features(img_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    inputs = feature_extractor(images=img, return_tensors="pt")
    with torch.no_grad():
        outputs = model(inputs)
    return outputs.flatten().detach().numpy()

# Other functions remain unchanged
def extract_sift_features(img):
    sift = cv2.SIFT_create()
    keypoints, descriptors = sift.detectAndCompute(img, None)
    if descriptors is None:
        descriptors = np.zeros((1, 128))  # Assuming 128-dimensional descriptors
    return descriptors

def extract_lbp_features(img):
    lbp = local_binary_pattern(img, P=8, R=1, method='uniform')
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 11), range=(0, 10))
    hist = hist.astype("float")
    hist /= (hist.sum() + 1e-6)
    return hist

def concatenate_features(sift_features, lbp_features, vit_features):
    sift_features = sift_features.flatten() if sift_features is not None else np.zeros(128)
    lbp_features = lbp_features.flatten() if lbp_features is not None else np.zeros(11)
    vit_features = vit_features.flatten() if vit_features is not None else np.zeros(768)

    combined_features = np.concatenate((sift_features, lbp_features, vit_features))

    fixed_length = 907  # Ensure the combined feature vector has a consistent size
    if len(combined_features) < fixed_length:
        combined_features = np.concatenate((combined_features, np.zeros(fixed_length - len(combined_features))))
    elif len(combined_features) > fixed_length:
        combined_features = combined_features[:fixed_length]

    return combined_features

# Process images and extract features
source_dir = './Cropped_Images/'
features, labels, label_map, reverse_label_map = process_images_in_directory(source_dir)

print(f"Feature vector length: {len(features[0])}")
print(len(features))




Feature vector length: 907
953


In [3]:
print("Label map:", label_map)

Label map: {'': 0, 'Person_001': 1, 'Person_002': 2, 'Person_003': 3, 'Person_004': 4, 'Person_005': 5, 'Person_006': 6, 'Person_007': 7, 'Person_008': 8, 'Person_009': 9, 'Person_010': 10, 'Person_011': 11, 'Person_012': 12, 'Person_013': 13, 'Person_014': 14, 'Person_015': 15, 'Person_016': 16, 'Person_017': 17, 'Person_018': 18, 'Person_019': 19, 'Person_020': 20, 'Person_021': 21, 'Person_022': 22, 'Person_023': 23, 'Person_024': 24, 'Person_025': 25, 'Person_026': 26, 'Person_027': 27, 'Person_028': 28, 'Person_029': 29, 'Person_030': 30, 'Person_031': 31, 'Person_032': 32, 'Person_033': 33, 'Person_034': 34, 'Person_035': 35, 'Person_036': 36, 'Person_037': 37, 'Person_038': 38, 'Person_039': 39, 'Person_040': 40, 'Person_041': 41, 'Person_042': 42, 'Person_043': 43, 'Person_044': 44, 'Person_045': 45, 'Person_046': 46, 'Person_047': 47, 'Person_048': 48, 'Person_049': 49, 'Person_050': 50, 'Person_051': 51, 'Person_052': 52, 'Person_053': 53, 'Person_054': 54, 'Person_055': 55, 

In [4]:
labels

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [5]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Example padding to a fixed length
fixed_length = 1000  # Define a fixed length
features_padded = pad_sequences(features, maxlen=fixed_length, padding='post', dtype='float32')


In [6]:
features = features_padded

In [7]:
# Convert lists to NumPy arrays
features = np.array(features)
labels = np.array(labels)

# Check the shapes again
print(features.shape)
print(labels.shape)

(953, 1000)
(953,)


In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, roc_auc_score, roc_curve, precision_recall_curve, confusion_matrix
from sklearn.preprocessing import LabelBinarizer
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
from sklearn.decomposition import KernelPCA
from sklearn.model_selection import train_test_split

# Perform Kernel PCA
kpca = KernelPCA(n_components=300, kernel='rbf')
features = kpca.fit_transform(features)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.3, random_state=42)

In [9]:
# import os
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, cross_val_predict
# from sklearn.preprocessing import StandardScaler
# from sklearn.metrics import classification_report, roc_curve, auc, accuracy_score, precision_score, recall_score, f1_score
# from sklearn.decomposition import PCA, KernelPCA
# from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
# from sklearn.manifold import TSNE
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.svm import LinearSVC
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.linear_model import LogisticRegression
# from xgboost import XGBClassifier
# from sklearn.calibration import CalibratedClassifierCV
# from sklearn.metrics import RocCurveDisplay, DetCurveDisplay
# import matplotlib.pyplot as plt
# import seaborn as sns
# from sklearn.decomposition import PCA, KernelPCA, SparsePCA, TruncatedSVD, FastICA, FactorAnalysis, NMF, IncrementalPCA
# from sklearn.manifold import TSNE, MDS, Isomap, LocallyLinearEmbedding
# from sklearn.random_projection import GaussianRandomProjection
# import umap

# # Create a directory to save plots and metrics if it doesn't exist
# output_dir = "model_outputs"
# os.makedirs(output_dir, exist_ok=True)

# # Standardize the features
# scaler = StandardScaler()
# features = scaler.fit_transform(features)

# # Dimensionality Reduction and Feature Selection Techniques
# def apply_pca(X, n_components=0.95):
#     pca = PCA(n_components=n_components)
#     return pca.fit_transform(X)

# def apply_kernel_pca(X, n_components=300, kernel='rbf'):
#     kpca = KernelPCA(n_components=n_components, kernel=kernel)
#     return kpca.fit_transform(X)

# def apply_lda(X, y, n_components=1):
#     lda = LDA(n_components=n_components)
#     return lda.fit_transform(X, y)

# def apply_tsne(X, n_components=2):
#     tsne = TSNE(n_components=n_components)
#     return tsne.fit_transform(X)

# def apply_ipca(X, n_components=5, batch_size=500):
#     ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
#     return ipca.fit_transform(X)

# def apply_grp(X, n_components=10):
#     grp = GaussianRandomProjection(n_components=n_components)
#     return grp.fit_transform(X)

# def apply_umap(X, n_components=10):
#     umap_model = umap.UMAP(n_components=n_components)
#     return umap_model.fit_transform(X)


# # umap_model = umap.UMAP(n_components=4)
# # X_train = umap_model.fit_transform(X_train)



# # Feature Selection and Dimensionality Reduction
# X_pca = apply_pca(features)
# X_kpca = apply_kernel_pca(features)
# X_lda = apply_lda(features, labels)
# X_tsne = apply_tsne(features)
# X_ipca = apply_ipca(features)
# X_grp = apply_grp(features)
# X_umap = apply_umap(features)

# # Classification Algorithms with L1 Regularization
# models = {
#     'SVM (L1)': CalibratedClassifierCV(LinearSVC(penalty='l1', dual=False, max_iter=10000)),
#     'RandomForest': RandomForestClassifier(),
#     'KNN': KNeighborsClassifier(),
#     'LogisticRegression (L1)': LogisticRegression(penalty='l1', solver='liblinear'),
#     'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss')
# }

# # 5-Fold Cross Validation
# cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# def save_plot(fig, filename):
#     file_path = os.path.join(output_dir, filename)
#     fig.savefig(file_path)
#     plt.close(fig)

# def evaluate_model(X, y, method_name):
#     results = {}
#     metrics_list = []

#     for name, model in models.items():
#         y_pred = cross_val_predict(model, X, y, cv=cv, method='predict')
#         y_prob = cross_val_predict(model, X, y, cv=cv, method='predict_proba')[:, 1]

#         accuracy = accuracy_score(y, y_pred)
#         precision = precision_score(y, y_pred)
#         recall = recall_score(y, y_pred)
#         f1 = f1_score(y, y_pred)

#         # Print and collect metrics
#         print(f'{method_name} - {name} - Accuracy: {accuracy}')
#         print(f'{method_name} - {name} - Precision: {precision}')
#         print(f'{method_name} - {name} - Recall: {recall}')
#         print(f'{method_name} - {name} - F1 Score: {f1}')
        
#         metrics_list.append({
#             'Method': method_name,
#             'Model': name,
#             'Accuracy': accuracy,
#             'Precision': precision,
#             'Recall': recall,
#             'F1 Score': f1
#         })

#         # Storing Results
#         results[name] = {
#             'model': model,
#             'y_pred': y_pred,
#             'y_prob': y_prob
#         }

#         # ROC Curve
#         fpr, tpr, _ = roc_curve(y, y_prob)
#         roc_auc = auc(fpr, tpr)
#         fig, ax = plt.subplots()
#         ax.plot(fpr, tpr, label=f'{name} (AUC = {roc_auc:.2f})')
#         ax.plot([0, 1], [0, 1], 'k--')
#         ax.set_xlim([0.0, 1.0])
#         ax.set_ylim([0.0, 1.05])
#         ax.set_xlabel('False Positive Rate')
#         ax.set_ylabel('True Positive Rate')
#         ax.set_title(f'{method_name} - {name} ROC Curve')
#         ax.legend(loc="lower right")
#         save_plot(fig, f'{method_name}_{name}_ROC_Curve.png')

#     # Save metrics to CSV
#     metrics_df = pd.DataFrame(metrics_list)
#     metrics_df.to_csv(os.path.join(output_dir, f'{method_name}_metrics.csv'), index=False)

#     return results

# # Plot Accuracy and Loss
# def plot_metrics(name, y, y_pred):
#     fig, axes = plt.subplots(1, 2, figsize=(14, 5))

#     # Accuracy Plot
#     sns.barplot(x=['Accuracy'], y=[accuracy_score(y, y_pred)], ax=axes[0])
#     axes[0].set_title(f'{name} Accuracy')

#     # Loss Plot (1 - Accuracy)
#     sns.barplot(x=['Loss'], y=[1 - accuracy_score(y, y_pred)], ax=axes[1])
#     axes[1].set_title(f'{name} Loss')

#     save_plot(fig, f'{name}_Accuracy_Loss.png')

# # Plot Equal Error Rate (EER)
# def plot_eer(y, y_prob, name):
#     display = DetCurveDisplay.from_predictions(y, y_prob)
#     fpr, fnr = display.fpr, display.fnr
#     eer = fpr[np.nanargmin(np.abs(fpr - fnr))]
#     fig = plt.figure()
#     plt.title(f'{name} DET Curve (EER = {eer:.2f})')
#     display.plot()
#     save_plot(fig, f'{name}_DET_Curve_EER.png')
#     print(f'{name} Equal Error Rate (EER): {eer:.2f}')

# # Generate Plots for Each Model
# def generate_plots_for_all_models(results, method_name):
#     for name, result in results.items():
#         plot_metrics(f'{method_name} - {name}', labels, result['y_pred'])
#         plot_eer(labels, result['y_prob'], f'{method_name} - {name}')

# # Evaluate and Plot for Each Dimensionality Reduction Method
# pca_results = evaluate_model(X_pca, labels, 'PCA')
# generate_plots_for_all_models(pca_results, 'PCA')

# kpca_results = evaluate_model(X_kpca, labels, 'Kernel PCA')
# generate_plots_for_all_models(kpca_results, 'Kernel PCA')

# lda_results = evaluate_model(X_lda, labels, 'LDA')
# generate_plots_for_all_models(lda_results, 'LDA')

# tsne_results = evaluate_model(X_tsne, labels, 't-SNE')
# generate_plots_for_all_models(tsne_results, 't-SNE')


# ipca_results = evaluate_model(X_ipca, labels, 'IPCA')
# generate_plots_for_all_models(ipca_results, 'IPCA')

# grp_results = evaluate_model(X_grp, labels, 'GRP')
# generate_plots_for_all_models(grp_results, 'GRP')

# umap_results = evaluate_model(X_umap, labels, 'UMAP')
# generate_plots_for_all_models(umap_results, 'UMAP')




In [10]:
# import os
# import numpy as np
# import pandas as pd
# from sklearn.model_selection import train_test_split, StratifiedKFold
# from sklearn.preprocessing import StandardScaler
# from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc
# from sklearn.decomposition import KernelPCA, IncrementalPCA
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.neighbors import KNeighborsClassifier
# from xgboost import XGBClassifier
# from sklearn.calibration import CalibratedClassifierCV
# from sklearn.random_projection import GaussianRandomProjection
# import umap
# from openpyxl import Workbook

# # Create a directory to save plots and metrics if it doesn't exist
# output_dir = "model_outputs"
# os.makedirs(output_dir, exist_ok=True)

# # Standardize the features
# scaler = StandardScaler()
# features = scaler.fit_transform(features)

# # Dimensionality Reduction and Feature Selection Techniques
# def apply_kernel_pca(X, n_components=300, kernel='rbf'):
#     kpca = KernelPCA(n_components=n_components, kernel=kernel)
#     return kpca.fit_transform(X)

# def apply_ipca(X, n_components=5, batch_size=500):
#     ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
#     return ipca.fit_transform(X)

# # def apply_grp(X, n_components=10):
# #     grp = GaussianRandomProjection(n_components=n_components)
# #     return grp.fit_transform(X)

# def apply_umap(X, n_components=10):
#     umap_model = umap.UMAP(n_components=n_components)
#     return umap_model.fit_transform(X)

# # Dimensionality Reduction
# # X_kpca = apply_kernel_pca(features)
# X_ipca = apply_ipca(features)
# # X_grp = apply_grp(features)
# X_umap = apply_umap(features)

# # Classification Algorithms
# models = {
#     'RandomForest': RandomForestClassifier(),
#     'KNN': KNeighborsClassifier(),
#     'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss')
# }

# # 5-Fold Cross Validation
# cv = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)

# def save_plot(fig, filename):
#     file_path = os.path.join(output_dir, filename)
#     fig.savefig(file_path)
#     plt.close(fig)

# def calculate_eer(fpr, tpr):
#     # Calculate the point where FPR and TPR are closest to the diagonal (EER)
#     eer = fpr[np.nanargmin(np.absolute((1 - tpr) - fpr))]
#     return eer

# def evaluate_model(X, y, method_name):
#     results = {}
#     metrics_list = []

#     # Split the data into train and test sets
#     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

#     for name, model in models.items():
#         # Train the model and predict on both train and test sets
#         model.fit(X_train, y_train)
#         y_train_pred = model.predict(X_train)
#         y_test_pred = model.predict(X_test)

#         # Calculate metrics for train set
#         train_accuracy = accuracy_score(y_train, y_train_pred)
#         train_precision = precision_score(y_train, y_train_pred)
#         train_recall = recall_score(y_train, y_train_pred)
#         train_f1 = f1_score(y_train, y_train_pred)

#         # Calculate metrics for test set
#         test_accuracy = accuracy_score(y_test, y_test_pred)
#         test_precision = precision_score(y_test, y_test_pred)
#         test_recall = recall_score(y_test, y_test_pred)
#         test_f1 = f1_score(y_test, y_test_pred)

#         # Calculate ROC curve and AUC for test set
#         fpr, tpr, _ = roc_curve(y_test, model.predict_proba(X_test)[:, 1])
#         roc_auc = auc(fpr, tpr)

#         # Calculate EER for test set
#         eer = calculate_eer(fpr, tpr)

#         # Print and collect metrics
#         print(f'{method_name} - {name} - Train Accuracy: {train_accuracy}')
#         print(f'{method_name} - {name} - Train Precision: {train_precision}')
#         print(f'{method_name} - {name} - Train Recall: {train_recall}')
#         print(f'{method_name} - {name} - Train F1 Score: {train_f1}')
#         print(f'{method_name} - {name} - Test Accuracy: {test_accuracy}')
#         print(f'{method_name} - {name} - Test Precision: {test_precision}')
#         print(f'{method_name} - {name} - Test Recall: {test_recall}')
#         print(f'{method_name} - {name} - Test F1 Score: {test_f1}')
#         print(f'{method_name} - {name} - Test AUC: {roc_auc}')
#         print(f'{method_name} - {name} - Test EER: {eer}')

#         metrics_list.append({
#             'Method': method_name,
#             'Model': name,
#             'Dimensionality Reduction Method': method_name,
#             'Train Accuracy': train_accuracy,
#             'Train Precision': train_precision,
#             'Train Recall': train_recall,
#             'Train F1 Score': train_f1,
#             'Test Accuracy': test_accuracy,
#             'Test Precision': test_precision,
#             'Test Recall': test_recall,
#             'Test F1 Score': test_f1,
#             'Test AUC': roc_auc,
#             'Test EER': eer
#         })

#         # Storing Results
#         results[name] = {
#             'model': model,
#             'y_train_pred': y_train_pred,
#             'y_test_pred': y_test_pred
#         }

#     # Save metrics to Excel
#     metrics_df = pd.DataFrame(metrics_list)
#     metrics_df.to_excel(os.path.join(output_dir, f'{method_name}_metrics.xlsx'), index=False)

#     return results

# # Evaluate for each Dimensionality Reduction Method
# # kpca_results = evaluate_model(X_kpca, labels, 'Kernel PCA')
# ipca_results = evaluate_model(X_ipca, labels, 'IPCA')
# # grp_results = evaluate_model(X_grp, labels, 'GRP')
# umap_results = evaluate_model(X_umap, labels, 'UMAP')


In [11]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_curve, auc
from sklearn.decomposition import IncrementalPCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.random_projection import GaussianRandomProjection
import umap

# Create a directory to save plots and metrics if it doesn't exist
output_dir = "model_outputs"
os.makedirs(output_dir, exist_ok=True)

# Standardize the features
scaler = StandardScaler()
features = scaler.fit_transform(features)

# Dimensionality Reduction and Feature Selection Techniques
def apply_ipca(X, n_components=5, batch_size=500):
    ipca = IncrementalPCA(n_components=n_components, batch_size=batch_size)
    return ipca.fit_transform(X)

def apply_umap(X, n_components=10):
    umap_model = umap.UMAP(n_components=n_components)
    return umap_model.fit_transform(X)

# Dimensionality Reduction
X_ipca = apply_ipca(features)
X_umap = apply_umap(features)

# Classification Algorithms
models = {
    'RandomForest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
    'XGBoost': XGBClassifier(use_label_encoder=False, eval_metric='logloss')
}

# 5-Fold Cross Validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

def save_plot(fig, filename):
    file_path = os.path.join(output_dir, filename)
    fig.savefig(file_path)
    plt.close(fig)

def calculate_eer(fpr, tpr):
    # Calculate the point where FPR and TPR are closest to the diagonal (EER)
    eer = fpr[np.nanargmin(np.absolute((1 - tpr) - fpr))]
    return eer

def plot_metrics(train_metrics, test_metrics, metric_name, method_name, model_name):
    metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score']
    fig, ax = plt.subplots(figsize=(10, 6))

    ax.plot(metrics, train_metrics, marker='o', linestyle='-', color='blue', label='Train')
    ax.plot(metrics, test_metrics, marker='o', linestyle='-', color='green', label='Test')

    ax.set_title(f'{method_name} - {model_name} - {metric_name}')
    ax.set_xlabel('Metrics')
    ax.set_ylabel('Score')
    ax.legend()
    ax.grid(True)

    save_plot(fig, f'{method_name}_{model_name}_{metric_name}.png')

def plot_roc_curve(fpr, tpr, roc_auc, method_name, model_name):
    fig, ax = plt.subplots(figsize=(10, 6))

    ax.plot(fpr, tpr, color='blue', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    ax.plot([0, 1], [0, 1], color='gray', linestyle='--')

    ax.set_xlim([0.0, 1.0])
    ax.set_ylim([0.0, 1.05])
    ax.set_title(f'{method_name} - {model_name} - ROC Curve')
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.legend(loc="lower right")

    save_plot(fig, f'{method_name}_{model_name}_ROC.png')

def evaluate_model(X, y, method_name):
    results = {}
    metrics_list = []

    # Split the data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

    for name, model in models.items():
        # Train the model and predict on both train and test sets
        model.fit(X_train, y_train)
        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)

        # Calculate metrics for train set
        train_accuracy = accuracy_score(y_train, y_train_pred)
        train_precision = precision_score(y_train, y_train_pred)
        train_recall = recall_score(y_train, y_train_pred)
        train_f1 = f1_score(y_train, y_train_pred)

        # Calculate metrics for test set
        test_accuracy = accuracy_score(y_test, y_test_pred)
        test_precision = precision_score(y_test, y_test_pred)
        test_recall = recall_score(y_test, y_test_pred)
        test_f1 = f1_score(y_test, y_test_pred)

        # Calculate ROC curve and AUC for test set
        fpr, tpr, _ = roc_curve(y_test, model.predict_proba(X_test)[:, 1])
        roc_auc = auc(fpr, tpr)

        # Calculate EER for test set
        eer = calculate_eer(fpr, tpr)

        # Plotting Metrics
        plot_metrics([train_accuracy, train_precision, train_recall, train_f1],
                     [test_accuracy, test_precision, test_recall, test_f1],
                     "Metrics", method_name, name)

        # Plot ROC Curve
        plot_roc_curve(fpr, tpr, roc_auc, method_name, name)

        # Print and collect metrics
        print(f'{method_name} - {name} - Train Accuracy: {train_accuracy}')
        print(f'{method_name} - {name} - Train Precision: {train_precision}')
        print(f'{method_name} - {name} - Train Recall: {train_recall}')
        print(f'{method_name} - {name} - Train F1 Score: {train_f1}')
        print(f'{method_name} - {name} - Test Accuracy: {test_accuracy}')
        print(f'{method_name} - {name} - Test Precision: {test_precision}')
        print(f'{method_name} - {name} - Test Recall: {test_recall}')
        print(f'{method_name} - {name} - Test F1 Score: {test_f1}')
        print(f'{method_name} - {name} - Test AUC: {roc_auc}')
        print(f'{method_name} - {name} - Test EER: {eer}')

        metrics_list.append({
            'Method': method_name,
            'Model': name,
            'Dimensionality Reduction Method': method_name,
            'Train Accuracy': train_accuracy,
            'Train Precision': train_precision,
            'Train Recall': train_recall,
            'Train F1 Score': train_f1,
            'Test Accuracy': test_accuracy,
            'Test Precision': test_precision,
            'Test Recall': test_recall,
            'Test F1 Score': test_f1,
            'Test AUC': roc_auc,
            'Test EER': eer
        })

        # Storing Results
        results[name] = {
            'model': model,
            'y_train_pred': y_train_pred,
            'y_test_pred': y_test_pred
        }

    # Save metrics to Excel
    metrics_df = pd.DataFrame(metrics_list)
    metrics_df.to_excel(os.path.join(output_dir, f'{method_name}_metrics.xlsx'), index=False)

    return results

# Evaluate for each Dimensionality Reduction Method
ipca_results = evaluate_model(X_ipca, labels, 'IPCA')
umap_results = evaluate_model(X_umap, labels, 'UMAP')


IPCA - RandomForest - Train Accuracy: 1.0
IPCA - RandomForest - Train Precision: 1.0
IPCA - RandomForest - Train Recall: 1.0
IPCA - RandomForest - Train F1 Score: 1.0
IPCA - RandomForest - Test Accuracy: 0.9615384615384616
IPCA - RandomForest - Test Precision: 0.9925373134328358
IPCA - RandomForest - Test Recall: 0.9300699300699301
IPCA - RandomForest - Test F1 Score: 0.9602888086642599
IPCA - RandomForest - Test AUC: 0.9865274585554306
IPCA - RandomForest - Test EER: 0.04895104895104895
IPCA - KNN - Train Accuracy: 0.9310344827586207
IPCA - KNN - Train Precision: 0.9140401146131805
IPCA - KNN - Train Recall: 0.9522388059701492
IPCA - KNN - Train F1 Score: 0.9327485380116959
IPCA - KNN - Test Accuracy: 0.9475524475524476
IPCA - KNN - Test Precision: 0.9444444444444444
IPCA - KNN - Test Recall: 0.951048951048951
IPCA - KNN - Test F1 Score: 0.9477351916376306
IPCA - KNN - Test AUC: 0.9621741894469167
IPCA - KNN - Test EER: 0.055944055944055944
IPCA - XGBoost - Train Accuracy: 1.0
IPCA - 