In [15]:
# Required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import cv2
import os
from PIL import Image

In [16]:
def load_cifake_data(data_path):
    """Load CIFAKE dataset from test set"""
    real_images = []
    fake_images = []
    
    # Load real images from test set
    real_path = os.path.join(data_path, 'test', 'REAL')
    for img_file in os.listdir(real_path):
        img = Image.open(os.path.join(real_path, img_file))
        real_images.append(np.array(img))
    
    # Load fake images from test set
    fake_path = os.path.join(data_path, 'test', 'FAKE')
    for img_file in os.listdir(fake_path):
        img = Image.open(os.path.join(fake_path, img_file))
        fake_images.append(np.array(img))
    
    # Create labels (0 = REAL, 1 = FAKE)
    X = np.array(real_images + fake_images)
    y = np.array([0] * len(real_images) + [1] * len(fake_images))
    
    # Normalize pixel values to 0-1 range
    X = X.astype('float32') / 255.0
    
    # Flatten images for traditional ML algorithms
    X_flattened = X.reshape(X.shape[0], -1)
    
    return X, X_flattened, y

In [17]:
# Set data path to CIFAKE directory
data_path = os.path.join(os.getcwd(), 'data', 'CIFAKE')

# Load and split data
X, X_flat, y = load_cifake_data(data_path)
X_train, X_test, y_train, y_test = train_test_split(X_flat, y, test_size=0.2, random_state=42, stratify=y)

# For neural networks, keep original shape
X_train_cnn, X_test_cnn, _, _ = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [18]:
from sklearn.linear_model import LogisticRegression

# Implementation
lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

# Evaluation
lr_accuracy = accuracy_score(y_test, lr_pred)
print(f"Logistic Regression Accuracy: {lr_accuracy:.4f}")

Logistic Regression Accuracy: 0.6555


In [19]:
from sklearn.neighbors import KNeighborsClassifier

# Try different k values
k_values = [3, 5, 7, 9, 11]
best_k = 5  # You can optimize this

knn_model = KNeighborsClassifier(n_neighbors=best_k)
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)

In [20]:
from sklearn.svm import SVC

# Use RBF kernel
svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)

In [21]:
from sklearn.naive_bayes import GaussianNB

nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_pred = nb_model.predict(X_test)

In [22]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42, max_depth=10)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)

In [23]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

In [10]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(random_state=42, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_pred = xgb_model.predict(X_test)

In [24]:
from sklearn.ensemble import AdaBoostClassifier

ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)
ada_model.fit(X_train, y_train)
ada_pred = ada_model.predict(X_test)

In [25]:
# Using Keras/TensorFlow
mlp_model = Sequential([
    Dense(512, activation='relu', input_shape=(3072,)),  # 32*32*3 = 3072
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

mlp_model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='binary_crossentropy',
                 metrics=['accuracy'])

mlp_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
mlp_pred = (mlp_model.predict(X_test) > 0.5).astype(int).flatten()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step


In [None]:
def evaluate_model(y_true, y_pred, model_name):
    """Comprehensive evaluation function"""
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    return {
        'Model': model_name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1
    }

In [None]:
# Create comparison dataframe
results = []
results.append(evaluate_model(y_test, lr_pred, 'Logistic Regression'))
results.append(evaluate_model(y_test, knn_pred, 'KNN'))
results.append(evaluate_model(y_test, svm_pred, 'SVM'))
results.append(evaluate_model(y_test, nb_pred, 'Naive Bayes'))
results.append(evaluate_model(y_test, dt_pred, 'Decision Tree'))
results.append(evaluate_model(y_test, rf_pred, 'Random Forest'))
results.append(evaluate_model(y_test, xgb_pred, 'XGBoost'))
results.append(evaluate_model(y_test, ada_pred, 'AdaBoost'))
results.append(evaluate_model(y_test, mlp_pred, 'MLP'))

results_df = pd.DataFrame(results)

In [None]:
# Display Results DataFrame
print("="*80)
print("MODEL COMPARISON RESULTS")
print("="*80)
print("\nDetailed Performance Metrics:\n")
print(results_df.to_string(index=False))
print("\n" + "="*80)

In [None]:
# Visualize Model Accuracies
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. Accuracy Comparison Bar Chart
ax1 = axes[0, 0]
models = results_df['Model'].values
accuracies = results_df['Accuracy'].values
colors = plt.cm.viridis(np.linspace(0, 1, len(models)))
bars = ax1.bar(models, accuracies, color=colors, edgecolor='black', linewidth=1.5)
ax1.set_ylabel('Accuracy', fontsize=12, fontweight='bold')
ax1.set_title('Model Accuracy Comparison', fontsize=14, fontweight='bold')
ax1.set_ylim([0, 1])
ax1.grid(axis='y', alpha=0.3)
for i, (bar, acc) in enumerate(zip(bars, accuracies)):
    ax1.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, 
             f'{acc:.3f}', ha='center', va='bottom', fontweight='bold')
plt.setp(ax1.xaxis.get_majorticklabels(), rotation=45, ha='right')

# 2. Precision Comparison
ax2 = axes[0, 1]
precisions = results_df['Precision'].values
bars = ax2.bar(models, precisions, color=colors, edgecolor='black', linewidth=1.5)
ax2.set_ylabel('Precision', fontsize=12, fontweight='bold')
ax2.set_title('Model Precision Comparison', fontsize=14, fontweight='bold')
ax2.set_ylim([0, 1])
ax2.grid(axis='y', alpha=0.3)
for i, (bar, prec) in enumerate(zip(bars, precisions)):
    ax2.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, 
             f'{prec:.3f}', ha='center', va='bottom', fontweight='bold')
plt.setp(ax2.xaxis.get_majorticklabels(), rotation=45, ha='right')

# 3. Recall Comparison
ax3 = axes[1, 0]
recalls = results_df['Recall'].values
bars = ax3.bar(models, recalls, color=colors, edgecolor='black', linewidth=1.5)
ax3.set_ylabel('Recall', fontsize=12, fontweight='bold')
ax3.set_title('Model Recall Comparison', fontsize=14, fontweight='bold')
ax3.set_ylim([0, 1])
ax3.grid(axis='y', alpha=0.3)
for i, (bar, rec) in enumerate(zip(bars, recalls)):
    ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, 
             f'{rec:.3f}', ha='center', va='bottom', fontweight='bold')
plt.setp(ax3.xaxis.get_majorticklabels(), rotation=45, ha='right')

# 4. F1-Score Comparison
ax4 = axes[1, 1]
f1_scores = results_df['F1-Score'].values
bars = ax4.bar(models, f1_scores, color=colors, edgecolor='black', linewidth=1.5)
ax4.set_ylabel('F1-Score', fontsize=12, fontweight='bold')
ax4.set_title('Model F1-Score Comparison', fontsize=14, fontweight='bold')
ax4.set_ylim([0, 1])
ax4.grid(axis='y', alpha=0.3)
for i, (bar, f1) in enumerate(zip(bars, f1_scores)):
    ax4.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.02, 
             f'{f1:.3f}', ha='center', va='bottom', fontweight='bold')
plt.setp(ax4.xaxis.get_majorticklabels(), rotation=45, ha='right')

plt.tight_layout()
plt.show()

print("\n✅ Visualizations displayed successfully!")

In [None]:
# Best Model Summary
print("\n" + "="*80)
print("🏆 BEST MODEL ANALYSIS")
print("="*80)

# Find best model by each metric
best_accuracy_idx = results_df['Accuracy'].idxmax()
best_precision_idx = results_df['Precision'].idxmax()
best_recall_idx = results_df['Recall'].idxmax()
best_f1_idx = results_df['F1-Score'].idxmax()

print(f"\n🥇 Best Accuracy: {results_df.loc[best_accuracy_idx, 'Model']}")
print(f"   Score: {results_df.loc[best_accuracy_idx, 'Accuracy']:.4f}")

print(f"\n🥇 Best Precision: {results_df.loc[best_precision_idx, 'Model']}")
print(f"   Score: {results_df.loc[best_precision_idx, 'Precision']:.4f}")

print(f"\n🥇 Best Recall: {results_df.loc[best_recall_idx, 'Model']}")
print(f"   Score: {results_df.loc[best_recall_idx, 'Recall']:.4f}")

print(f"\n🥇 Best F1-Score: {results_df.loc[best_f1_idx, 'Model']}")
print(f"   Score: {results_df.loc[best_f1_idx, 'F1-Score']:.4f}")

# Overall best model (by accuracy)
overall_best = results_df.loc[best_accuracy_idx]
print(f"\n{'='*80}")
print(f"✨ OVERALL BEST MODEL: {overall_best['Model']}")
print(f"{'='*80}")
print(f"Accuracy:  {overall_best['Accuracy']:.4f}")
print(f"Precision: {overall_best['Precision']:.4f}")
print(f"Recall:    {overall_best['Recall']:.4f}")
print(f"F1-Score:  {overall_best['F1-Score']:.4f}")
print(f"{'='*80}\n")

In [None]:
# Create a detailed heatmap of all metrics
fig, ax = plt.subplots(figsize=(12, 8))

# Prepare data for heatmap
heatmap_data = results_df[['Accuracy', 'Precision', 'Recall', 'F1-Score']].values

# Create heatmap
im = ax.imshow(heatmap_data, cmap='RdYlGn', aspect='auto', vmin=0, vmax=1)

# Set ticks and labels
ax.set_xticks(np.arange(len(['Accuracy', 'Precision', 'Recall', 'F1-Score'])))
ax.set_yticks(np.arange(len(results_df)))
ax.set_xticklabels(['Accuracy', 'Precision', 'Recall', 'F1-Score'], fontweight='bold')
ax.set_yticklabels(results_df['Model'], fontweight='bold')

# Add colorbar
cbar = plt.colorbar(im, ax=ax)
cbar.set_label('Score', rotation=270, labelpad=20, fontweight='bold')

# Add text annotations
for i in range(len(results_df)):
    for j in range(len(['Accuracy', 'Precision', 'Recall', 'F1-Score'])):
        text = ax.text(j, i, f'{heatmap_data[i, j]:.3f}',
                      ha="center", va="center", color="black", fontweight='bold', fontsize=10)

ax.set_title('Model Performance Heatmap - All Metrics', fontsize=14, fontweight='bold', pad=20)
plt.tight_layout()
plt.show()

print("✅ Heatmap visualization displayed!")

In [None]:
# Model Rankings by Different Metrics
print("\n" + "="*80)
print("📊 MODEL RANKINGS")
print("="*80)

# Rank by Accuracy
print("\n🏆 RANKINGS BY ACCURACY:")
print("-" * 80)
acc_ranking = results_df.sort_values('Accuracy', ascending=False).reset_index(drop=True)
for idx, row in acc_ranking.iterrows():
    medal = ['🥇', '🥈', '🥉', '4️⃣', '5️⃣', '6️⃣', '7️⃣', '8️⃣', '9️⃣'][idx]
    print(f"{medal} {idx+1}. {row['Model']:25} → Accuracy: {row['Accuracy']:.4f}")

# Rank by F1-Score
print("\n\n🏆 RANKINGS BY F1-SCORE:")
print("-" * 80)
f1_ranking = results_df.sort_values('F1-Score', ascending=False).reset_index(drop=True)
for idx, row in f1_ranking.iterrows():
    medal = ['🥇', '🥈', '🥉', '4️⃣', '5️⃣', '6️⃣', '7️⃣', '8️⃣', '9️⃣'][idx]
    print(f"{medal} {idx+1}. {row['Model']:25} → F1-Score: {row['F1-Score']:.4f}")

print("\n" + "="*80)