# Libraries

In [4]:

import os
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, cohen_kappa_score, confusion_matrix
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from xgboost import XGBClassifier



# Data Directories

In [5]:
train_dir = "Final/train"
test_dir = "Final/test"
valid_dir = "Final/valid"
classes = ["Benign", "Normal", "Cancer"]
image_size = (128, 128) 


# DataLoader

In [6]:
def load_data(folder_path):
    x_data = []
    y_data = []
    for class_name in classes:
        class_folder = os.path.join(folder_path, class_name)
        for filename in os.listdir(class_folder):
            label = ("/".join(filename.split("/")[-1:])) 
            if filename.endswith(".jpg") and "_Mask" not in label:
                img_path = os.path.join(class_folder, filename)
                img = Image.open(img_path)
                img = img.resize(image_size)
                img_array = np.array(img) / 255.0
                x_data.append(img_array)
                y_data.append(class_name)
    return np.array(x_data), np.array(y_data)

x_train, y_train = load_data(train_dir)
x_test, y_test   = load_data(test_dir)
x_valid, y_valid = load_data(valid_dir)



## Reshape image data from 3D to 2D, 
### this besause are 3d and we need 2d to analyse 


In [7]:
x_train_reshaped = x_train.reshape(x_train.shape[0], -1)
x_test_reshaped  = x_test.reshape(x_test.shape[0], -1)
x_valid_reshaped = x_valid.reshape(x_valid.shape[0], -1)

label_encoder   = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded  = label_encoder.transform(y_test)
y_valid_encoded = label_encoder.transform(y_valid)



# Classifiers we need

In [8]:
classifiers = [
    KNeighborsClassifier(),  #KNN
    SVC(),                   # SUPPORT VECTOR
    RandomForestClassifier(),  # RANDOM FOREST
    AdaBoostClassifier(),     # Adaptive BOOST, boosting tecnique,ensemble learning
    XGBClassifier()          # XG boost
]
  


# Evaluate classifiers for train, test, and valid sets

In [9]:

datasets = [('Train', x_train_reshaped, y_train_encoded),
            ('Test', x_test_reshaped, y_test_encoded),
            ('Valid', x_valid_reshaped, y_valid_encoded)]

for dataset_name, x_data, y_data in datasets:
    print(f"Evaluating on {dataset_name} dataset:")
    

    
    for classifier in classifiers:
        classifier_name = type(classifier).__name__
        print(f"------------------------{classifier_name} Metrics:------------------------")
       
        
        classifier.fit(x_train_reshaped, y_train_encoded)
        y_pred = classifier.predict(x_data)
        
        accuracy = accuracy_score(y_data, y_pred)
        precision = precision_score(y_data, y_pred, average='weighted')
        recall = recall_score(y_data, y_pred, average='weighted')
        f1 = f1_score(y_data, y_pred, average='weighted')
        kappa = cohen_kappa_score(y_data, y_pred)
        confusion = confusion_matrix(y_data, y_pred)
        
        print(f"Accuracy: {accuracy:.2f}")
        print(f"Precision: {precision:.2f}")
        print(f"Recall: {recall:.2f}")
        print(f"F1 Score: {f1:.2f}")
        print(f"Cohen's Kappa: {kappa:.2f}")
        print("Confusion Matrix:")
        print(confusion)
        print("\n")

Evaluating on Train dataset:
------------------------KNeighborsClassifier Metrics:------------------------
Accuracy: 0.75
Precision: 0.76
Recall: 0.75
F1 Score: 0.76
Cohen's Kappa: 0.62
Confusion Matrix:
[[ 734  145  102]
 [ 310  607   82]
 [ 156  104 1425]]


------------------------SVC Metrics:------------------------
Accuracy: 0.74
Precision: 0.74
Recall: 0.74
F1 Score: 0.74
Cohen's Kappa: 0.60
Confusion Matrix:
[[ 586  242  153]
 [ 208  651  140]
 [ 113   88 1484]]


------------------------RandomForestClassifier Metrics:------------------------
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1 Score: 1.00
Cohen's Kappa: 1.00
Confusion Matrix:
[[ 981    0    0]
 [   0  999    0]
 [   0    0 1685]]


------------------------AdaBoostClassifier Metrics:------------------------
Accuracy: 0.72
Precision: 0.71
Recall: 0.72
F1 Score: 0.72
Cohen's Kappa: 0.56
Confusion Matrix:
[[ 566  281  134]
 [ 305  575  119]
 [ 108   87 1490]]


------------------------XGBClassifier Metrics:--------------