In [1]:
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
from skimage import feature, io
from skimage.transform import resize
from skimage.util import img_as_ubyte
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from fuzzywuzzy import fuzz, process


In [2]:

# --------------------- Feature Extraction ---------------------
class FeatureExtraction():
    def __init__(self):
        self.image_size = (240, 240)

    def extract_labels(self):
        paths = []
        non_valid_files = ['810199515_real_none_jungle_10', '810199515_real_none_jungle_1', 'desktop.ini', '810100473_real_none_sea_4']
        for image_path in os.listdir(self.image_dir):
            if not any(nvf in image_path for nvf in non_valid_files):
                paths.append(image_path)
        return pd.DataFrame({0: paths})

    def lbp(self, path):
        try:
            img = io.imread(os.path.join(self.image_dir, path))
            if img.ndim == 3:
                from skimage.color import rgb2gray
                img = rgb2gray(img)
            img = resize(img, self.image_size)
            img = img_as_ubyte(img)
            lbp = feature.local_binary_pattern(img, 8, 1, method='uniform')
            hist, _ = np.histogram(lbp.ravel(), bins=np.arange(0, 59))
            return hist
        except:
            return np.zeros(58)

    def fft(self, path):
        try:
            img = io.imread(os.path.join(self.image_dir, path))
            if img.ndim == 3:
                from skimage.color import rgb2gray
                img = rgb2gray(img)
            img = resize(img, self.image_size)
            fft_img = np.fft.fft2(img)
            fft_shift = np.fft.fftshift(fft_img)
            magnitude = np.log(1 + np.abs(fft_shift))
            return magnitude.flatten()
        except:
            return np.zeros(self.image_size[0]*self.image_size[1])

    def extract(self):
        self.df = self.extract_labels()
        tqdm.pandas()
        self.df['lbp'] = self.df[0].progress_apply(self.lbp)
        self.df['fft'] = self.df[0].progress_apply(self.fft)
        lbp_df = pd.DataFrame(self.df['lbp'].tolist())
        fft_df = pd.DataFrame(self.df['fft'].tolist())
        return self.df, pd.concat([lbp_df, fft_df], axis=1)

    def pca(self, features_df):
        scaled = StandardScaler().fit_transform(features_df)
        reduced = PCA(n_components=512).fit_transform(scaled)
        return pd.DataFrame(reduced)

    def run(self, real_dir, fake_dir):
        self.image_dir = real_dir
        real_labels, real_features = self.extract()

        self.image_dir = fake_dir
        fake_labels, fake_features = self.extract()

        features = pd.concat([real_features, fake_features], axis=0)
        labels = pd.concat([real_labels, fake_labels], axis=0)
        return self.pca(features), labels.drop(['lbp', 'fft'], axis=1)


In [3]:

# --------------------- Preprocessing ---------------------
class Preprocessing():
    def __init__(self, features_file, labels_file):
        self.features_df = features_file
        self.labels_df = labels_file[0].str.replace('-', '_').str.split('_', expand=True)
        self.labels_df = self.labels_df.drop([0, 4], axis=1)
        self.labels_df.columns = ["class", "generator", "category"]

    def fix_category_names(self):
        self.labels_df["category"] = self.labels_df["category"].replace({"forest": "jungle"})
        valid = ['sea', 'mountain', 'jungle']
        self.labels_df["category"] = self.labels_df["category"].apply(lambda x: process.extractOne(x.lower(), valid, scorer=fuzz.token_set_ratio)[0])

    def fix_class_names(self):
        self.labels_df["class"] = self.labels_df["class"].str.lower()

    def fix_generator_names(self):
        valid = ["none", "stable", "dalle", "dream", "midjourney", "craiyon"]
        self.labels_df["generator"] = self.labels_df["generator"].apply(lambda x: process.extractOne(x.lower(), valid, scorer=fuzz.token_set_ratio)[0])

    def normalize(self):
        self.features_df = pd.DataFrame(StandardScaler().fit_transform(self.features_df))

    def preprocess(self):
        self.fix_category_names()
        self.fix_generator_names()
        self.fix_class_names()
        self.normalize()

    def get_dataframes(self):
        return self.labels_df, self.features_df

In [4]:

# --------------------- Pipeline ---------------------

# Update your real and fake image directory paths
real_dir = r"C:\Desktop\MLAssignment\real"
fake_dir = r"C:\Desktop\MLAssignment\fake"

# Step 1: Extract handcrafted features (LBP + FFT)
feature_extraction = FeatureExtraction()
h_features, h_labels = feature_extraction.run(real_dir, fake_dir)

# Step 2: Preprocess features and labels
preprocessor = Preprocessing(h_features, h_labels)
preprocessor.preprocess()
labels_handcrafted, features_handcrafted = preprocessor.get_dataframes()

100%|██████████████████████████████████████████████████████████████████████████████| 1707/1707 [45:50<00:00,  1.61s/it]
100%|██████████████████████████████████████████████████████████████████████████████| 1707/1707 [31:27<00:00,  1.11s/it]
100%|██████████████████████████████████████████████████████████████████████████████| 1710/1710 [03:06<00:00,  9.16it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 1710/1710 [02:45<00:00, 10.33it/s]


In [None]:

# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(features_handcrafted, labels_handcrafted["class"], test_size=0.3, random_state=42)


In [None]:
# Step 4: Train SVM
svm_model = SVC(kernel='rbf', C=1, gamma='scale')
svm_model.fit(X_train, y_train)

In [None]:
# Step 5: Evaluate
y_pred = svm_model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))