In [1]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
import joblib

In [5]:
features_to_use = ['lbp'] #We can change this to add 'hog', 'color', or 'gabor'.

models = {
    'Random Forest': RandomForestClassifier(random_state=1),
    'Gradient Boosting': GradientBoostingClassifier(random_state=1),
    'SVM': SVC(random_state=1)
}

In [19]:
#These functions help set a loop/pipeline for experimenting with different features/models.
def load_and_combine_features(real_path, fake_path, features_to_use):
    #We load real and fake features, combine them with labels.
    real_data = np.load(real_path)
    fake_data = np.load(fake_path)

    #We extract and concatenate selected features.
    real_features = np.concatenate([real_data[feature] for feature in features_to_use], axis=1)
    fake_features = np.concatenate([fake_data[feature] for feature in features_to_use], axis=1)

    #We combine real and fake images.
    X = np.vstack([real_features, fake_features])
    y = np.hstack([np.zeros(len(real_features)), np.ones(len(fake_features))])

    return X, y

def print_metrics(y_true, y_pred, dataset_name, model_namme):
    #We choose the following evaluation metrics.
    Acc = accuracy_score(y_true, y_pred)
    F1 = f1_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred)

    print(f"\n{model_name} - {dataset_name} Results:")
    print(f" Accuracy: {Acc:.4f}")
    print(f" F1 Score: {F1:.4f}")
    print(f" Confusion Matrix:")
    print(f"   {cm}")

In [20]:
#Time to load the data.
print("Loading Features...")
X_train, y_train = load_and_combine_features(
    'train_real_all_features.npz',
    'train_fake_all_features.npz',
    features_to_use
)

X_valid, y_valid = load_and_combine_features(
    'valid_real_all_features.npz',
    'valid_fake_all_features.npz',
    features_to_use
)

X_test, y_test = load_and_combine_features(
    'test_real_all_features.npz',
    'test_fake_all_features.npz',
    features_to_use
)

print(f"Train set: {X_train.shape}, Valid set: {X_valid.shape}, Test set: {X_test.shape}")

Loading Features...
Train set: (10000, 10), Valid set: (2000, 10), Test set: (2000, 10)


In [21]:
#For LBP it is recommended to scale the features.
print("\nScaling features...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_valid_scaled = scaler.transform(X_valid)
X_test_scaled = scaler.transform(X_test)

#Save scaler
joblib.dump(scaler, 'scaler.joblib')
print("Scaler saved to 'scaler.joblib'")


Scaling features...
Scaler saved to 'scaler.joblib'


In [24]:
#Time to train and evaluate all models.
trained_models = {}

for model_name, model in models.items():
    print(f"\n{'+'*50}")
    print(f"Training {model_name} ...")

    #Training.
    model.fit(X_train_scaled, y_train)

    #Predicting on all sets.
    y_train_pred = model.predict(X_train_scaled)
    y_valid_pred = model.predict(X_valid_scaled)
    y_test_pred = model.predict(X_test_scaled)

    #Printing metrics.
    print_metrics(y_train, y_train_pred, "Train", model_name)
    print_metrics(y_valid, y_valid_pred, "Valid", model_name)
    print_metrics(y_test, y_test_pred, "Test", model_name)

    #Saving models.
    model_filename = f"{model_name.lower().replace(' ', '_')}_model.joblib"
    joblib.dump(model, model_filename)
    print(f"\nModel saved to '{model_filename}'")

    #Storing in dictionary.
    trained_models[model_name] = model

print("\n" + "+"*50)
print("All models trained and saved.")


++++++++++++++++++++++++++++++++++++++++++++++++++
Training Random Forest ...

Random Forest - Train Results:
 Accuracy: 1.0000
 F1 Score: 1.0000
 Confusion Matrix:
   [[5000    0]
 [   0 5000]]

Random Forest - Valid Results:
 Accuracy: 0.6240
 F1 Score: 0.6259
 Confusion Matrix:
   [[619 381]
 [371 629]]

Random Forest - Test Results:
 Accuracy: 0.6310
 F1 Score: 0.6411
 Confusion Matrix:
   [[603 397]
 [341 659]]

Model saved to 'random_forest_model.joblib'

++++++++++++++++++++++++++++++++++++++++++++++++++
Training Gradient Boosting ...

Gradient Boosting - Train Results:
 Accuracy: 0.6769
 F1 Score: 0.6965
 Confusion Matrix:
   [[3062 1938]
 [1293 3707]]

Gradient Boosting - Valid Results:
 Accuracy: 0.6180
 F1 Score: 0.6430
 Confusion Matrix:
   [[548 452]
 [312 688]]

Gradient Boosting - Test Results:
 Accuracy: 0.6210
 F1 Score: 0.6428
 Confusion Matrix:
   [[560 440]
 [318 682]]

Model saved to 'gradient_boosting_model.joblib'

+++++++++++++++++++++++++++++++++++++++++++++++