In [None]:
#Connect Google Drive
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
from keras.layers import Dense, Dropout
from keras.models import Sequential, load_model
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.model_selection import KFold
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
import os

import numpy as np

In [None]:
# Read data from Excel CSV file
data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Data/transformed_data.csv')
# Create DataFrame
df = pd.DataFrame(data)

# Prepare features and target
ftr = df.iloc[:,:-1] # Excludes file path columns and target feature
target = df['DeepFake']  # Target Feature

In [None]:
X = ftr
y = target

In [None]:
# Define the K-fold Cross Validator
num_folds = 5
kfold = KFold(n_splits=num_folds, shuffle=True)

# Initialize the variables to track the best model and performance across folds
best_accuracy = 0
best_model = None
results = []
acc_per_fold = []
loss_per_fold = []


In [None]:
fold_no = 1
results_file_path = '/content/drive/MyDrive/Colab Notebooks/FF_Neural_Network_ML/Models/'
filename = 'new_best_model_FFNN_extended_ftr.keras'
for train, test in kfold.split(X, y):
    # Define the model architecture (re-initialized for each fold)
    model = Sequential([
        Dense(128, input_dim=X.shape[1], activation='relu'),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dropout(0.3),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # Define callbacks
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)
    model_checkpoint = ModelCheckpoint(os.path.join(results_file_path, filename), save_best_only=True, monitor='val_accuracy', mode='max')

    print(f'Training for fold {fold_no} ...')

    # Fit data to model
    history = model.fit(X.iloc[train], y.iloc[train],
                        batch_size=32,
                        epochs=150,
                        verbose=1,
                        validation_data=(X.iloc[test], y.iloc[test]),
                        callbacks=[early_stopping, reduce_lr, model_checkpoint])

    # Evaluate the model
    scores = model.evaluate(X.iloc[test], y.iloc[test], verbose=0)
    print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
    acc_per_fold.append(scores[1] * 100)
    loss_per_fold.append(scores[0])
    results.append({'fold': fold_no, 'loss': scores[0], 'accuracy': scores[1]})

    # Check if the current model is the best model
    if scores[1] > best_accuracy:
        best_accuracy = scores[1]
        best_model = model
    fold_no += 1

In [None]:
# Print detailed fold results and average scores
print('------------------------------------------------------------------------')
print('Score per fold')
for i in range(0, len(acc_per_fold)):
    print('------------------------------------------------------------------------')
    print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Accuracy: {acc_per_fold[i]}%')
print('------------------------------------------------------------------------')
print('Average scores for all folds:')
print(f'> Accuracy: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
print(f'> Loss: {np.mean(loss_per_fold)}')
print('------------------------------------------------------------------------')



In [None]:
# Print the best model's score
print(f"Best Model's Accuracy: {best_accuracy * 100:.2f}%")

Best Model's Accuracy: 83.57%


In [None]:
best_model.save('/content/drive/MyDrive/Colab Notebooks/FF_Neural_Network_ML/Models/new_best_model_FFNN_extended_ftr.h5')