In [1]:
import os
import shutil
import numpy as np
from PIL import Image

<h3>SVM</h3>

In [None]:
import os
import cv2
import numpy as np
from skimage.feature import hog, local_binary_pattern
from scipy.spatial.distance import euclidean
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC


def extract_hog_features(image, orientations=9):
    hog_features, hog_image = hog(image, pixels_per_cell=(8, 8), cells_per_block=(2, 2), 
                                   block_norm='L2-Hys', visualize=True, feature_vector=True)
    return hog_features

def extract_lbp_features(image):
    # Set the parameters for LBP
    radius = 1
    n_points = 8 * radius
    lbp = local_binary_pattern(image, n_points, radius, method="uniform")
    
    # Compute the histogram of LBP
    (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2))
    # Normalize the histogram
    hist = hist.astype("float")
    hist /= hist.sum()
    
    return hist

def crop_center(image, crop_size=128):
    h, w = image.shape
    center_h, center_w = h // 2, w // 2
    half_crop = crop_size // 2
    cropped_image = image[center_h - half_crop:center_h + half_crop, center_w - half_crop:center_w + half_crop]
    return cropped_image

X = []
y = []
parent_path = "archive/similar/inpainting"

for folder_name in os.listdir(parent_path):
    folder_path = os.path.join(parent_path, folder_name)
    if os.path.isdir(folder_path):
        real_img = None
        forged_img = None
        print("reading folder " + folder_name)
        for image in os.listdir(folder_path):
            image_path = os.path.join(folder_path, image)
            if os.path.isfile(image_path):
                img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

                if img is not None:
                    h, w = img.shape

                    # Skip images with height or width smaller than 256px
                    if h < 128 or w < 128:
                        print(f"Skipping image {image_path} (size: {h}x{w})")
                        continue  # Skip this image

                    if image.startswith("original"):
                        real_img = img
                    elif image.startswith("inpainting"):
                        forged_img = img

        if real_img is not None and forged_img is not None:
            real_img_cropped = crop_center(real_img)
            forged_img_cropped = crop_center(forged_img)

            real_hog = extract_hog_features(real_img_cropped)
            forged_hog = extract_hog_features(forged_img_cropped)

            real_lbp = extract_lbp_features(real_img_cropped)
            forged_lbp = extract_lbp_features(forged_img_cropped)

            combined_features = np.hstack((real_hog, real_lbp))
            X.append(combined_features)  
            y.append(0)  

            combined_features = np.hstack((forged_hog, forged_lbp))
            X.append(combined_features)
            y.append(1)  

X = np.array(X)
y = np.array(y)

In [None]:
import os
import cv2
import numpy as np
from skimage.feature import hog, local_binary_pattern
from scipy.spatial.distance import euclidean
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
model_map = {"svm_linear": SVC(kernel = "linear"),
             "svm_rbf": SVC(kernel = "rbf"),
            "svm_poly": SVC(kernel = "poly")}

parameters_map = { 
    "C": [0.01, 0.1, 1, 10, 100],
    "gamma": [0.001, 0.01, 0.1, 1],
    "degree": [2, 3, 4, 5] 
}

def fine_tune(model: str, param_grid: dict, folds:int = 3):
    return HalvingGridSearchCV(model_map[model], param_grid, n_jobs = -1, refit = True, cv = folds, verbose = 3, factor = 3, resource = "n_samples")

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
# linear
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import train_test_split, HalvingGridSearchCV
linear_svm = fine_tune("svm_linear", parameters_map)
linear_svm.fit(X_train, y_train)

In [None]:
print(f"Grid search linear SVM best parameters: {linear_svm.best_params_}")
print(f"Cross validated score of the best linear SVM model: {linear_svm.best_score_}")

In [None]:
y_pred = linear_svm.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

accuracy = accuracy_score(y_test, y_pred)  
print(f"Accuracy: {accuracy * 100:.2f}%")

conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

In [None]:
# polynomial
poly_svm = fine_tune("svm_poly", parameters_map)
poly_svm.fit(X_train, y_train)

In [None]:
print(f"Grid search linear SVM best parameters: {poly_svm.best_params_}")
print(f"Cross validated score of the best linear SVM model: {poly_svm.best_score_}")

In [None]:
y_pred_poly = poly_svm.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred_poly)  
print(f"Accuracy: {accuracy * 100:.2f}%")

conf_matrix = confusion_matrix(y_test, y_pred_poly)
print("Confusion Matrix:")
print(conf_matrix)

precision = precision_score(y_test, y_pred_poly)
recall = recall_score(y_test, y_pred_poly)
f1 = f1_score(y_test, y_pred_poly)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

In [None]:
# rbf
rbf_svm = fine_tune("svm_rbf", parameters_map)
rbf_svm.fit(X_train, y_train)

In [None]:
print(f"Grid search linear SVM best parameters: {rbf_svm.best_params_}")
print(f"Cross validated score of the best linear SVM model: {rbf_svm.best_score_}")

In [None]:
y_pred_rbf = rbf_svm.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, y_pred_rbf)  
print(f"Accuracy: {accuracy * 100:.2f}%")

conf_matrix = confusion_matrix(y_test, y_pred_rbf)
print("Confusion Matrix:")
print(conf_matrix)

precision = precision_score(y_test, y_pred_rbf)
recall = recall_score(y_test, y_pred_rbf)
f1 = f1_score(y_test, y_pred_rbf)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

<h3>CNN-SVM</h3>

In [2]:
# Define the source directory
source_dir = 'archive/similar/inpainting'

# Define the destination directories
original_dest = 'extracted_images/original'
inpainting_dest = 'extracted_images/inpainting'

# Create destination directories if they don't exist
os.makedirs(original_dest, exist_ok=True)
os.makedirs(inpainting_dest, exist_ok=True)

# Counter for naming files
counter = 1

# Walk through the source directory
for root, dirs, files in os.walk(source_dir):
    original_file = os.path.join(root, 'original.png')
    inpainting_file = os.path.join(root, 'inpainting.png')
    
    # Check if both files exist in the current directory
    if os.path.exists(original_file) and os.path.exists(inpainting_file):
        # Copy original image
        shutil.copy2(original_file, os.path.join(original_dest, f'original_{counter}.png'))
        
        # Copy inpainting image
        shutil.copy2(inpainting_file, os.path.join(inpainting_dest, f'inpainting_{counter}.png'))
        
        counter += 1

print(f"Extracted {counter-1} pairs of images.")

Extracted 5063 pairs of images.


In [3]:
from concurrent.futures import ThreadPoolExecutor
import tqdm

# Define directories
source_dir = 'archive/similar/inpainting'
original_dest = 'mask_images/original'
inpainting_dest = 'mask_images/inpainting'

# Create destination directories
os.makedirs(original_dest, exist_ok=True)
os.makedirs(inpainting_dest, exist_ok=True)

def apply_mask(image_path, mask_path):
    image = Image.open(image_path).convert('RGBA')
    mask = Image.open(mask_path).convert('L')
    
    # Convert images to numpy arrays
    image_array = np.array(image)
    mask_array = np.array(mask)
    
    # Create an all transparent image
    result = np.zeros_like(image_array)
    
    # Copy only the pixels where the mask is white (255)
    result[mask_array == 255] = image_array[mask_array == 255]
    
    return Image.fromarray(result)

def process_folder(folder_path):
    folder_name = os.path.basename(folder_path)
    original_file = os.path.join(folder_path, 'original.png')
    inpainting_file = os.path.join(folder_path, 'inpainting.png')
    mask_file = os.path.join(folder_path, 'mask.png')
    
    if all(os.path.exists(f) for f in [original_file, inpainting_file, mask_file]):
        # Apply mask to original image and save
        masked_original = apply_mask(original_file, mask_file)
        masked_original.save(os.path.join(original_dest, f'masked_original_{folder_name}.png'))
        
        # Apply mask to inpainting image and save
        masked_inpainting = apply_mask(inpainting_file, mask_file)
        masked_inpainting.save(os.path.join(inpainting_dest, f'masked_inpainting_{folder_name}.png'))
        
        return True
    return False

# Get list of folders
folders = [os.path.join(source_dir, d) for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]

# Use ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor() as executor:
    results = list(tqdm.tqdm(executor.map(process_folder, folders), total=len(folders)))

processed_count = sum(results)
print(f"Extracted and masked {processed_count} pairs of images.")

100%|███████████████████████████████████████| 5063/5063 [22:35<00:00,  3.74it/s]

Extracted and masked 5063 pairs of images.





In [None]:
import cv2
def load_image_paths_from_folder(folder, label):
    image_paths = []
    labels = []
    for filename in os.listdir(folder):
        if filename.endswith((".png", ".jpg", ".jpeg")):
            img_path = os.path.join(folder, filename)
            image_paths.append(img_path)
            labels.append(label)
    return image_paths, labels

# Load real image paths (label = 1)
# real_folder = "extracted_images/original"
real_folder = "mask_images/original"
real_image_paths, real_labels = load_image_paths_from_folder(real_folder, label=1)

# Load fake image paths (label = 0)
# fake_folder = "extracted_images/inpainting"
fake_folder = "mask_images/inpainting"
fake_image_paths, fake_labels = load_image_paths_from_folder(fake_folder, label=0)

# Combine real and fake image paths and labels
image_paths = real_image_paths + fake_image_paths
labels = real_labels + fake_labels

# Convert labels to NumPy array
labels = np.array(labels)

# Check number of images loaded
print(f"Number of real images: {len(real_image_paths)}")
print(f"Number of fake images: {len(fake_image_paths)}")
print(f"Total number of images: {len(image_paths)}")

# Function to load and preprocess images (to be used when training the model)
def load_and_preprocess_image(image_path, target_size=(256, 256), preserve_texture=True):
    img = Image.open(image_path)
    img = img.convert('RGB')  # Convert to RGB
    if preserve_texture:
        # Convert to numpy array for OpenCV processing
        img_array = np.array(img)
        
        # Use Lanczos resampling which better preserves high-frequency details
        img_array = cv2.resize(
            img_array, 
            target_size, 
            interpolation=cv2.INTER_LANCZOS4
        )
    else:
        img = img.resize(target_size)  # Resize the image
        img_array = np.array(img)

    img_array = img_array.astype(np.float32)
    img_array /= 255.0  # Normalize to [0, 1]
    return img_array

In [7]:
from sklearn.model_selection import train_test_split
# Perform train-test split
train_paths, test_paths, train_labels, test_labels = train_test_split(
    image_paths, labels, test_size=0.2, random_state=42, stratify=labels
)

# Print the shapes of the resulting datasets
print("\nAfter train-test split:")
print(f"Train set size: {len(train_paths)}")
print(f"Train labels shape: {train_labels.shape}")
print(f"Test set size: {len(test_paths)}")
print(f"Test labels shape: {test_labels.shape}")

# Print the distribution of labels in train and test sets
print(f"\nTrain set - Real: {np.sum(train_labels == 1)}, Fake: {np.sum(train_labels == 0)}")
print(f"Test set - Real: {np.sum(test_labels == 1)}, Fake: {np.sum(test_labels == 0)}")


After train-test split:
Train set size: 8100
Train labels shape: (8100,)
Test set size: 2026
Test labels shape: (2026,)

Train set - Real: 4050, Fake: 4050
Test set - Real: 1013, Fake: 1013


In [8]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, Flatten
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.models import load_model

In [10]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Activation, MaxPooling2D, GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.regularizers import l2

def improved_deepfake_detection_model(input_shape=(256, 256, 3)):
    inputs = Input(shape=input_shape)
    
    # Convolutional layers
    x = Conv2D(32, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(inputs)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = Conv2D(64, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = Conv2D(128, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    # Additional convolutional layer
    x = Conv2D(256, (3, 3), padding='same', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    # Global Average Pooling
    x = GlobalAveragePooling2D()(x)
    
    # Dense layers
    x = Dense(256, activation='relu', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.5)(x)
    
    x = Dense(128, activation='relu', kernel_regularizer=l2(1e-4))(x)
    x = BatchNormalization()(x)
    x = Dropout(0.6)(x)
    
    # Final layer
    outputs = Dense(1, activation='sigmoid')(x)
    
    model = Model(inputs=inputs, outputs=outputs)
    return model

# Create the model
model = improved_deepfake_detection_model()

# Print model summary
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 conv2d_7 (Conv2D)           (None, 256, 256, 32)      896       
                                                                 
 batch_normalization_8 (Bat  (None, 256, 256, 32)      128       
 chNormalization)                                                
                                                                 
 activation_9 (Activation)   (None, 256, 256, 32)      0         
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 128, 128, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 128, 128, 64)      18496 

In [11]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 conv2d_7 (Conv2D)           (None, 256, 256, 32)      896       
                                                                 
 batch_normalization_8 (Bat  (None, 256, 256, 32)      128       
 chNormalization)                                                
                                                                 
 activation_9 (Activation)   (None, 256, 256, 32)      0         
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 128, 128, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 128, 128, 64)      18496 

In [16]:
# view keys for 0.60 weights
import h5py

with h5py.File('model_weights_cnn/model_weights_epoch_05_val_loss_0.60.weights.h5', 'r') as h5file:
    print("Keys in the file:")
    for key in h5file.keys():
        print(key)

Keys in the file:
activation_4
activation_5
activation_6
activation_7
batch_normalization_10
batch_normalization_11
batch_normalization_6
batch_normalization_7
batch_normalization_8
batch_normalization_9
conv2d_4
conv2d_5
conv2d_6
conv2d_7
dense_3
dense_4
dense_5
dropout_2
dropout_3
global_average_pooling2d_1
input_2
max_pooling2d_4
max_pooling2d_5
max_pooling2d_6
max_pooling2d_7
top_level_model_weights


In [17]:
# view keys for 0.57 weights
import h5py

with h5py.File('model_weights_cnn/model_weights_epoch_04_val_loss_0.57.weights.h5', 'r') as h5file:
    print("Keys in the file:")
    for key in h5file.keys():
        print(key)

Keys in the file:
activation_12
activation_13
activation_14
activation_15
batch_normalization_18
batch_normalization_19
batch_normalization_20
batch_normalization_21
batch_normalization_22
batch_normalization_23
conv2d_12
conv2d_13
conv2d_14
conv2d_15
dense_10
dense_11
dense_9
dropout_6
dropout_7
global_average_pooling2d_3
input_4
max_pooling2d_12
max_pooling2d_13
max_pooling2d_14
max_pooling2d_15
top_level_model_weights


In [18]:
# loading weights

with h5py.File('model_weights_cnn/model_weights_epoch_04_val_loss_0.57.weights.h5', 'r') as h5file:
    weights_group = h5file['top_level_model_weights']
    
    # Iterate over the layers in the model and set weights
    for layer in model.layers:
        if layer.name in weights_group:
            layer_weights = [weights_group[layer.name][w] for w in weights_group[layer.name]]
            layer.set_weights(layer_weights)

print("Weights loaded successfully!")

Weights loaded successfully!


In [19]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 conv2d_7 (Conv2D)           (None, 256, 256, 32)      896       
                                                                 
 batch_normalization_8 (Bat  (None, 256, 256, 32)      128       
 chNormalization)                                                
                                                                 
 activation_9 (Activation)   (None, 256, 256, 32)      0         
                                                                 
 max_pooling2d_5 (MaxPoolin  (None, 128, 128, 32)      0         
 g2D)                                                            
                                                                 
 conv2d_8 (Conv2D)           (None, 128, 128, 64)      18496 

<b>Testing global_average_pooling2d layer</b>

In [20]:
# Function to extract features using the CNN
def extract_features(image_paths, model):
    features = []
    for img_path in image_paths:
        img_array = load_and_preprocess_image(img_path, target_size=(256, 256))
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

        # Extract features
        feature = model.predict(img_array)
        features.append(feature.flatten()) 
    return np.array(features)

# Extract features from convolutional layer
# Change this accordingly to the conv layer u wan to extract
feature_extractor = Model(inputs=model.input, outputs=model.get_layer('global_average_pooling2d').output)

# Extract features
features = extract_features(image_paths, feature_extractor)



































KeyboardInterrupt: 

In [None]:
np.save('global_average_pooling2d.npy', features)  # Saves as a .npy file
print("saved")

In [None]:
# can try PCA afterwards, run above without PCA first // can adjust variance %

# import numpy as np
# from sklearn import svm
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.decomposition import PCA
# from sklearn.metrics import classification_report, accuracy_score


# train_paths, test_paths, train_labels, test_labels = train_test_split(
#     features, labels, test_size=0.2, random_state=42, stratify=labels
# )

# # Step 2: Standardize the features
# scaler = StandardScaler()
# train_paths = scaler.fit_transform(train_paths)
# test_paths = scaler.transform(test_paths)

# # Step 3: Apply PCA
# pca = PCA(n_components=0.85)  # Retain 85% of the variance
# train_paths = pca.fit_transform(train_paths)
# test_paths = pca.transform(test_paths)

In [17]:
from sklearn.svm import SVC

model_map = {"svm_linear": SVC(kernel = "linear"),
             "svm_rbf": SVC(kernel = "rbf"),
            "svm_poly": SVC(kernel = "poly")}

parameters_map = { 
    "C": [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],  # Further expanded range
    "gamma": [0.0001, 0.001, 0.01, 0.1, 1, 10],  # Further expanded range
    "degree": [2, 3, 4, 5, 6]  # Added more degrees for poly kernel
}


def fine_tune(model: str, param_grid: dict, folds:int = 3):
    return HalvingGridSearchCV(model_map[model], param_grid, n_jobs = -1, refit = True, cv = folds, verbose = 3, factor = 3, resource = "n_samples")

In [232]:
# linear

from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import train_test_split, HalvingGridSearchCV

linear_svm = fine_tune("svm_linear", parameters_map)
linear_svm.fit(train_paths, train_labels)

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 100
max_resources_: 8100
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 240
n_resources: 100
Fitting 3 folds for each of 240 candidates, totalling 720 fits


python(77522) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(77523) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(77524) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(77525) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(77526) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(77527) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(77528) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
python(77529) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


KeyboardInterrupt: 

In [179]:
print(f"Grid search linear SVM best parameters: {linear_svm.best_params_}")
print(f"Cross validated score of the best linear SVM model: {linear_svm.best_score_}")

Grid search linear SVM best parameters: {'C': 1, 'degree': 4, 'gamma': 0.1}
Cross validated score of the best linear SVM model: 0.6830864197530865


In [180]:
y_pred = linear_svm.predict(test_paths)

In [182]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred)  
print(f"Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(test_labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision, recall, and F1 score
precision = precision_score(test_labels, y_pred)
recall = recall_score(test_labels, y_pred)
f1 = f1_score(test_labels, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 66.49%
Confusion Matrix:
[[704 309]
 [370 643]]
Precision: 0.68
Recall: 0.63
F1 Score: 0.65


In [201]:
# polynomial

poly_svm = fine_tune("svm_poly", parameters_map)
poly_svm.fit(train_paths, train_labels)

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 100
max_resources_: 8100
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 240
n_resources: 100
Fitting 3 folds for each of 240 candidates, totalling 720 fits
----------
iter: 1
n_candidates: 80
n_resources: 300
Fitting 3 folds for each of 80 candidates, totalling 240 fits
----------
iter: 2
n_candidates: 27
n_resources: 900
Fitting 3 folds for each of 27 candidates, totalling 81 fits
----------
iter: 3
n_candidates: 9
n_resources: 2700
Fitting 3 folds for each of 9 candidates, totalling 27 fits
----------
iter: 4
n_candidates: 3
n_resources: 8100
Fitting 3 folds for each of 3 candidates, totalling 9 fits
[CV 1/3] END C=0.0001, degree=2, gamma=0.01;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 3/3] END C=0.0001, degree=2, gamma=0.01;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 2/3] END C=0.0001, degree=2, gamma=1;, score=(train=0.621, test=0.455) total time=  

[CV 1/3] END C=0.0001, degree=2, gamma=0.001;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 1/3] END C=0.0001, degree=2, gamma=1;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 3/3] END C=0.0001, degree=3, gamma=10;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 3/3] END C=0.0001, degree=4, gamma=0.0001;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 1/3] END C=0.0001, degree=4, gamma=1;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.0001, degree=4, gamma=1;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 2/3] END C=0.0001, degree=5, gamma=0.001;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.0001, degree=5, gamma=0.001;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 1/3] END C=0.001, degree=6, gamma=0.01;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.001, degree=6, gamma=0.01;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.001, degree=6, gamma=

[CV 1/3] END C=0.0001, degree=2, gamma=0.0001;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 3/3] END C=0.0001, degree=2, gamma=1;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 2/3] END C=0.0001, degree=3, gamma=0.0001;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 2/3] END C=0.0001, degree=3, gamma=0.001;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 1/3] END C=0.0001, degree=3, gamma=0.1;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 3/3] END C=0.0001, degree=3, gamma=1;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 1/3] END C=0.0001, degree=4, gamma=0.01;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.0001, degree=4, gamma=0.01;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 1/3] END C=0.0001, degree=6, gamma=0.0001;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.0001, degree=6, gamma=0.0001;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.0001, degre

[CV 2/3] END C=0.0001, degree=2, gamma=0.0001;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 2/3] END C=0.0001, degree=3, gamma=1;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 2/3] END C=0.0001, degree=4, gamma=0.001;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.0001, degree=4, gamma=0.001;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 3/3] END C=0.0001, degree=4, gamma=1;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 1/3] END C=0.0001, degree=4, gamma=10;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 1/3] END C=0.0001, degree=5, gamma=0.0001;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.0001, degree=5, gamma=0.0001;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 1/3] END C=0.0001, degree=5, gamma=1;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.0001, degree=5, gamma=1;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 2/3] END C=0.0001, degree=6, gam

[CV 2/3] END C=0.0001, degree=2, gamma=0.01;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.0001, degree=2, gamma=0.1;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 1/3] END C=0.0001, degree=2, gamma=10;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 1/3] END C=0.01, degree=2, gamma=1;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.01, degree=2, gamma=1;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.01, degree=2, gamma=1;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 1/3] END C=0.01, degree=2, gamma=10;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.01, degree=2, gamma=10;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.01, degree=2, gamma=10;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 1/3] END C=0.01, degree=3, gamma=0.0001;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.01, degree=3, gamma=0.0001;, score=(train=0

[CV 2/3] END C=0.0001, degree=2, gamma=0.001;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 1/3] END C=0.0001, degree=2, gamma=0.1;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.0001, degree=2, gamma=10;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.0001, degree=3, gamma=0.0001;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 1/3] END C=0.0001, degree=3, gamma=0.01;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.0001, degree=3, gamma=0.1;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 2/3] END C=0.0001, degree=4, gamma=0.1;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.0001, degree=4, gamma=0.1;, score=(train=0.606, test=0.576) total time=   0.0s
[CV 1/3] END C=0.0001, degree=5, gamma=0.01;, score=(train=0.591, test=0.333) total time=   0.0s
[CV 2/3] END C=0.0001, degree=5, gamma=0.01;, score=(train=0.621, test=0.455) total time=   0.0s
[CV 3/3] END C=0.0001, degree=5, 

In [202]:
print(f"Grid search poly SVM best parameters: {poly_svm.best_params_}")
print(f"Cross validated score of the best poly SVM model: {poly_svm.best_score_}")

Grid search poly SVM best parameters: {'C': 0.001, 'degree': 3, 'gamma': 1}
Cross validated score of the best poly SVM model: 0.675679012345679


In [203]:
y_pred_poly = poly_svm.predict(test_paths)

In [205]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred_poly)  
print(f"Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(test_labels, y_pred_poly)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision, recall, and F1 score
precision = precision_score(test_labels, y_pred_poly)
recall = recall_score(test_labels, y_pred_poly)
f1 = f1_score(test_labels, y_pred_poly)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 66.04%
Confusion Matrix:
[[734 279]
 [409 604]]
Precision: 0.68
Recall: 0.60
F1 Score: 0.64


In [183]:
# rbf

rbf_svm = fine_tune("svm_rbf", parameters_map)
rbf_svm.fit(train_paths, train_labels)

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 100
max_resources_: 8100
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 240
n_resources: 100
Fitting 3 folds for each of 240 candidates, totalling 720 fits
----------
iter: 1
n_candidates: 80
n_resources: 300
Fitting 3 folds for each of 80 candidates, totalling 240 fits
----------
iter: 2
n_candidates: 27
n_resources: 900
Fitting 3 folds for each of 27 candidates, totalling 81 fits
----------
iter: 3
n_candidates: 9
n_resources: 2700
Fitting 3 folds for each of 9 candidates, totalling 27 fits
----------
iter: 4
n_candidates: 3
n_resources: 8100
Fitting 3 folds for each of 3 candidates, totalling 9 fits


In [184]:
# Check the best parameters
print("Best parameters found:", rbf_svm.best_params_)

# Check the kernel of the best estimator
print("Kernel used:", rbf_svm.best_estimator_.kernel)

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 100
max_resources_: 8100
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 240
n_resources: 100
Fitting 3 folds for each of 240 candidates, totalling 720 fits
----------
iter: 1
n_candidates: 80
n_resources: 300
Fitting 3 folds for each of 80 candidates, totalling 240 fits
----------
iter: 2
n_candidates: 27
n_resources: 900
Fitting 3 folds for each of 27 candidates, totalling 81 fits
----------
iter: 3
n_candidates: 9
n_resources: 2700
Fitting 3 folds for each of 9 candidates, totalling 27 fits
----------
iter: 4
n_candidates: 3
n_resources: 8100
Fitting 3 folds for each of 3 candidates, totalling 9 fits
Best parameters found: {'C': 100, 'degree': 5, 'gamma': 0.001}
Kernel used: rbf


In [185]:
y_pred_rbf = rbf_svm.predict(test_paths)

In [207]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred_rbf)  
print(f"Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(test_labels, y_pred_rbf)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision, recall, and F1 score
precision = precision_score(test_labels, y_pred_rbf)
recall = recall_score(test_labels, y_pred_rbf)
f1 = f1_score(test_labels, y_pred_rbf)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")

Accuracy: 70.34%
Confusion Matrix:
[[779 234]
 [367 646]]
Precision: 0.73
Recall: 0.64
F1 Score: 0.68


In [135]:
# testing out ensemble model (catboost + rbf) // 69.79% compared to just rbf of 70.34%

from sklearn.experimental import enable_halving_search_cv  # noqa
from sklearn.model_selection import HalvingGridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score

# Define the parameter grid for SVC
svc_parameters_map = {
    'C': [0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
    'gamma': [0.001, 0.01, 0.1, 1],
    'degree': [2, 3, 4, 5],
    'probability': [True]  
}

rbf_svm = SVC(kernel='rbf')

rbf_svm = HalvingGridSearchCV(
    estimator=rbf_svm,
    param_grid=svc_parameters_map,
    n_jobs=-1,
    refit=True,
    cv=3,
    verbose=3,
    factor=3,
    resource='n_samples'
)

rbf_svm.fit(train_paths, train_labels)

catboost_parameters_map = {
    'iterations': [100, 200],
    'depth': [4, 6, 8],
    'learning_rate': [0.01, 0.1],
    'l2_leaf_reg': [1, 3, 5]
}

cat_clf = CatBoostClassifier(verbose=0, random_state=42)

catboost_search = HalvingGridSearchCV(
    estimator=cat_clf,
    param_grid=catboost_parameters_map,
    n_jobs=-1,
    refit=True,
    cv=3,
    verbose=3,
    factor=3,
    resource='n_samples'
)

catboost_search.fit(train_paths, train_labels)

# Create the ensemble model (Voting Classifier)
voting_clf = VotingClassifier(
    estimators=[
        ('cat', catboost_search.best_estimator_),
        ('rbf', rbf_svm.best_estimator_)
    ],
    voting='soft'
)

# Train the ensemble model
voting_clf.fit(train_paths, train_labels)

# Make predictions
y_pred = voting_clf.predict(test_paths)

# Evaluate the ensemble model
accuracy = accuracy_score(test_labels, y_pred)
print(f"Ensemble Model Accuracy: {accuracy}")

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 100
max_resources_: 8100
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 112
n_resources: 100
Fitting 3 folds for each of 112 candidates, totalling 336 fits
----------
iter: 1
n_candidates: 38
n_resources: 300
Fitting 3 folds for each of 38 candidates, totalling 114 fits
----------
iter: 2
n_candidates: 13
n_resources: 900
Fitting 3 folds for each of 13 candidates, totalling 39 fits
----------
iter: 3
n_candidates: 5
n_resources: 2700
Fitting 3 folds for each of 5 candidates, totalling 15 fits
----------
iter: 4
n_candidates: 2
n_resources: 8100
Fitting 3 folds for each of 2 candidates, totalling 6 fits
n_iterations: 4
n_required_iterations: 4
n_possible_iterations: 4
min_resources_: 300
max_resources_: 8100
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 36
n_resources: 300
Fitting 3 folds for each of 36 candidates, totalling 108 fits
----------
iter: 1
n

In [None]:
from sklearn import svm
from sklearn.ensemble import VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
# Best parameters found: {'C': 1000, 'degree': 4, 'gamma': 0.0001}
#     {'C': 1, 'degree': 3, 'gamma': 0.01}
#     {'C': 1, 'degree': 4, 'gamma': 0.0001}

svm_rbf = svm.SVC(kernel='rbf', C=1000, degree=4, gamma=0.0001, probability=True)
svm_linear = svm.SVC(kernel='linear', C=1.0, degree=4, gamma=0.001, probability=True)
svm_poly = svm.SVC(kernel='poly', C=1.0, degree=4, gamma=0.0001, probability=True)

# Create an ensemble using VotingClassifier
ensemble_model = VotingClassifier(estimators=[
    ('rbf', svm_rbf),
    ('linear', svm_linear),
    ('poly', svm_poly)],
    voting='soft'  # Use 'soft' for probability-based voting
)

# Train the ensemble model
ensemble_model.fit(train_paths, train_labels)


# Evaluate the ensemble model
accuracy = ensemble_model.score(test_paths, test_labels)
print(f"Accuracy: {accuracy * 100:.2f}%")

<b>Testing conv2d_10 layer</b>

In [None]:
from tensorflow.keras.models import Model

# Function to extract features using the CNN
def extract_features(image_paths, model):
    features = []
    for img_path in image_paths:
        img_array = load_and_preprocess_image(img_path, target_size=(256, 256))
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

        # Extract features
        feature = model.predict(img_array)
        features.append(feature.flatten())  # Flatten if necessary
    return np.array(features)

# extract features, from conv2d_9 layer
feature_extractor = Model(inputs=model.input, outputs=model.get_layer('conv2d_10').output)

# Extract features
features = extract_features(image_paths, feature_extractor)

In [None]:
np.save('features_conv2d_10.npy', features)  # Saves as a .npy file
print("saved")

In [15]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

train_paths, test_paths, train_labels, test_labels = train_test_split(
    features_dense2, labels, test_size=0.2, random_state=42, stratify=labels
)

scaler = StandardScaler()

train_paths = scaler.fit_transform(train_paths)
test_paths = scaler.transform(test_paths)

In [None]:
# can try PCA afterwards, run above without PCA first // can adjust variance %

# import numpy as np
# from sklearn import svm
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.decomposition import PCA
# from sklearn.metrics import classification_report, accuracy_score


# train_paths, test_paths, train_labels, test_labels = train_test_split(
#     features, labels, test_size=0.2, random_state=42, stratify=labels
# )

# # Step 2: Standardize the features
# scaler = StandardScaler()
# train_paths = scaler.fit_transform(train_paths)
# test_paths = scaler.transform(test_paths)

# # Step 3: Apply PCA
# pca = PCA(n_components=0.85)  # Retain 85% of the variance
# train_paths = pca.fit_transform(train_paths)
# test_paths = pca.transform(test_paths)


In [None]:
# linear

from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import train_test_split, HalvingGridSearchCV

linear_svm = fine_tune("svm_linear", parameters_map)
linear_svm.fit(train_paths, train_labels)

In [None]:
print(f"Grid search linear SVM best parameters: {linear_svm.best_params_}")
print(f"Cross validated score of the best linear SVM model: {linear_svm.best_score_}")

In [None]:
y_pred = linear_svm.predict(test_paths)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred)  
print(f"Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(test_labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision, recall, and F1 score
precision = precision_score(test_labels, y_pred)
recall = recall_score(test_labels, y_pred)
f1 = f1_score(test_labels, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


In [None]:
# polynomial

poly_svm = fine_tune("svm_poly", parameters_map)
poly_svm.fit(train_paths, train_labels)

In [None]:
print(f"Grid search poly SVM best parameters: {poly_svm.best_params_}")
print(f"Cross validated score of the best poly SVM model: {poly_svm.best_score_}")

In [None]:
y_pred_poly = poly_svm.predict(test_paths)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred_poly)  
print(f"Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(test_labels, y_pred_poly)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision, recall, and F1 score
precision = precision_score(test_labels, y_pred_poly)
recall = recall_score(test_labels, y_pred_poly)
f1 = f1_score(test_labels, y_pred_poly)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


In [None]:
# rbf

rbf_svm = fine_tune("svm_rbf", parameters_map)
rbf_svm.fit(train_paths, train_labels)

In [None]:
# Check the best parameters
print("Best parameters found:", rbf_svm.best_params_)

# Check the kernel of the best estimator
print("Kernel used:", rbf_svm.best_estimator_.kernel)

In [None]:
y_pred_rbf = rbf_svm.predict(test_paths)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred_rbf)  
print(f"Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(test_labels, y_pred_rbf)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision, recall, and F1 score
precision = precision_score(test_labels, y_pred_rbf)
recall = recall_score(test_labels, y_pred_rbf)
f1 = f1_score(test_labels, y_pred_rbf)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


<b>Testing conv2d_9 layer</b>

In [None]:
from tensorflow.keras.models import Model

# Function to extract features using the CNN
def extract_features(image_paths, model):
    features = []
    for img_path in image_paths:
        img_array = load_and_preprocess_image(img_path, target_size=(256, 256))
        img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension

        # Extract features
        feature = model.predict(img_array)
        features.append(feature.flatten())  # Flatten if necessary
    return np.array(features)

# extract features, from conv2d_8 layer
feature_extractor = Model(inputs=model.input, outputs=model.get_layer('conv2d_9').output)

# Extract features
features = extract_features(image_paths, feature_extractor)

In [None]:
np.save('features_conv2d_9.npy', features)  # Saves as a .npy file
print("saved")

In [217]:
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

train_paths, test_paths, train_labels, test_labels = train_test_split(
    features, labels, test_size=0.2, random_state=42, stratify=labels
)

scaler = StandardScaler()
train_paths = scaler.fit_transform(train_paths)
test_paths = scaler.transform(test_paths)

In [None]:
# can try PCA afterwards, run above without PCA first // can adjust variance %

# import numpy as np
# from sklearn import svm
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler
# from sklearn.decomposition import PCA
# from sklearn.metrics import classification_report, accuracy_score


# train_paths, test_paths, train_labels, test_labels = train_test_split(
#     features, labels, test_size=0.2, random_state=42, stratify=labels
# )

# # Step 2: Standardize the features
# scaler = StandardScaler()
# train_paths = scaler.fit_transform(train_paths)
# test_paths = scaler.transform(test_paths)

# # Step 3: Apply PCA
# pca = PCA(n_components=0.85)  # Retain 85% of the variance
# train_paths = pca.fit_transform(train_paths)
# test_paths = pca.transform(test_paths)


In [219]:
# linear

from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import train_test_split, HalvingGridSearchCV

linear_svm = fine_tune("svm_linear", parameters_map)
linear_svm.fit(train_paths, train_labels)

n_iterations: 5
n_required_iterations: 5
n_possible_iterations: 5
min_resources_: 100
max_resources_: 8100
aggressive_elimination: False
factor: 3
----------
iter: 0
n_candidates: 240
n_resources: 100
Fitting 3 folds for each of 240 candidates, totalling 720 fits
----------
iter: 1
n_candidates: 80
n_resources: 300
Fitting 3 folds for each of 80 candidates, totalling 240 fits
----------
iter: 2
n_candidates: 27
n_resources: 900
Fitting 3 folds for each of 27 candidates, totalling 81 fits


KeyboardInterrupt: 

In [None]:
print(f"Grid search linear SVM best parameters: {linear_svm.best_params_}")
print(f"Cross validated score of the best linear SVM model: {linear_svm.best_score_}")

In [None]:
y_pred = linear_svm.predict(test_paths)

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score

# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred)  
print(f"Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(test_labels, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision, recall, and F1 score
precision = precision_score(test_labels, y_pred)
recall = recall_score(test_labels, y_pred)
f1 = f1_score(test_labels, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
 


In [None]:
# polynomial

poly_svm = fine_tune("svm_poly", parameters_map)
poly_svm.fit(train_paths, train_labels)


In [None]:
print(f"Grid search poly SVM best parameters: {poly_svm.best_params_}")
print(f"Cross validated score of the best poly SVM model: {poly_svm.best_score_}")

In [None]:
y_pred_poly = poly_svm.predict(test_paths)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred_poly)  
print(f"Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(test_labels, y_pred_poly)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision, recall, and F1 score
precision = precision_score(test_labels, y_pred_poly)
recall = recall_score(test_labels, y_pred_poly)
f1 = f1_score(test_labels, y_pred_poly)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")


In [None]:
# rbf

rbf_svm = fine_tune("svm_rbf", parameters_map)
rbf_svm.fit(train_paths, train_labels)


In [None]:
# Check the best parameters
print("Best parameters found:", rbf_svm.best_params_)

# Check the kernel of the best estimator
print("Kernel used:", rbf_svm.best_estimator_.kernel)


In [None]:
y_pred_rbf = rbf_svm.predict(test_paths)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, y_pred_rbf)  
print(f"Accuracy: {accuracy * 100:.2f}%")

# Calculate confusion matrix
conf_matrix = confusion_matrix(test_labels, y_pred_rbf)
print("Confusion Matrix:")
print(conf_matrix)

# Calculate precision, recall, and F1 score
precision = precision_score(test_labels, y_pred_rbf)
recall = recall_score(test_labels, y_pred_rbf)
f1 = f1_score(test_labels, y_pred_rbf)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
