In [2]:
# Required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
import cv2
import os
from PIL import Image

In [3]:
def load_cifake_data(data_path):
    """Load CIFAKE dataset"""
    real_images = []
    fake_images = []
    
    # Load real images
    real_path = os.path.join(data_path, '/Users/akhileshkumaravel/ML Prj CIFAKE/data/CIFAKE/test/REAL')
    for img_file in os.listdir(real_path):
        img = Image.open(os.path.join(real_path, img_file))
        real_images.append(np.array(img))
    
    # Load fake images
    fake_path = os.path.join(data_path, '/Users/akhileshkumaravel/ML Prj CIFAKE/data/CIFAKE/test/FAKE')
    for img_file in os.listdir(fake_path):
        img = Image.open(os.path.join(fake_path, img_file))
        fake_images.append(np.array(img))
    
    # Create labels
    X = np.array(real_images + fake_images)
    y = np.array([0] * len(real_images) + [1] * len(fake_images))
    
    # Normalize pixel values
    X = X.astype('float32') / 255.0
    
    # Flatten images for traditional ML algorithms
    X_flattened = X.reshape(X.shape[0], -1)
    
    return X, X_flattened, y

In [4]:
# Split data
X, X_flat, y = load_cifake_data('path_to_cifake_dataset')
X_train, X_test, y_train, y_test = train_test_split(X_flat, y, test_size=0.2, random_state=42, stratify=y)

# For neural networks, keep original shape
X_train_cnn, X_test_cnn, _, _ = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [5]:
from sklearn.linear_model import LogisticRegression

# Implementation
lr_model = LogisticRegression(random_state=42, max_iter=1000)
lr_model.fit(X_train, y_train)
lr_pred = lr_model.predict(X_test)

# Evaluation
lr_accuracy = accuracy_score(y_test, lr_pred)
print(f"Logistic Regression Accuracy: {lr_accuracy:.4f}")

Logistic Regression Accuracy: 0.6567


In [6]:
from sklearn.neighbors import KNeighborsClassifier

# Try different k values
k_values = [3, 5, 7, 9, 11]
best_k = 5  # You can optimize this

knn_model = KNeighborsClassifier(n_neighbors=best_k)
knn_model.fit(X_train, y_train)
knn_pred = knn_model.predict(X_test)

In [7]:
from sklearn.svm import SVC

# Use RBF kernel
svm_model = SVC(kernel='rbf', random_state=42)
svm_model.fit(X_train, y_train)
svm_pred = svm_model.predict(X_test)

In [8]:
from sklearn.naive_bayes import GaussianNB

nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_pred = nb_model.predict(X_test)

In [9]:
from sklearn.tree import DecisionTreeClassifier

dt_model = DecisionTreeClassifier(random_state=42, max_depth=10)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)

In [10]:
from sklearn.ensemble import RandomForestClassifier

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

In [None]:
from xgboost import XGBClassifier

xgb_model = XGBClassifier(random_state=42, eval_metric='logloss')
xgb_model.fit(X_trai n, y_train)
xgb_pred = xgb_model.predict(X_test)

In [12]:
from sklearn.ensemble import AdaBoostClassifier

ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)
ada_model.fit(X_train, y_train)
ada_pred = ada_model.predict(X_test)

In [13]:
# Using Keras/TensorFlow
mlp_model = Sequential([
    Dense(512, activation='relu', input_shape=(3072,)),  # 32*32*3 = 3072
    Dense(256, activation='relu'),
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

mlp_model.compile(optimizer=Adam(learning_rate=0.001),
                 loss='binary_crossentropy',
                 metrics=['accuracy'])

mlp_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
mlp_pred = (mlp_model.predict(X_test) > 0.5).astype(int).flatten()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [14]:
def evaluate_model(y_true, y_pred, model_name):
    """Comprehensive evaluation function"""
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)
    
    return {
        'Model': model_name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1
    }

In [15]:
# Create comparison dataframe
results = []
results.append(evaluate_model(y_test, lr_pred, 'Logistic Regression'))
results.append(evaluate_model(y_test, knn_pred, 'KNN'))
results.append(evaluate_model(y_test, svm_pred, 'SVM'))
results.append(evaluate_model(y_test, nb_pred, 'Naive Bayes'))
results.append(evaluate_model(y_test, dt_pred, 'Decision Tree'))
results.append(evaluate_model(y_test, rf_pred, 'Random Forest'))
results.append(evaluate_model(y_test, xgb_pred, 'XGBoost'))
results.append(evaluate_model(y_test, ada_pred, 'AdaBoost'))
results.append(evaluate_model(y_test, mlp_pred, 'MLP'))

results_df = pd.DataFrame(results)