In [1]:
import os
import numpy as np
import pandas as pd
from fuzzywuzzy import fuzz, process
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [2]:

# ===============================
# 1. Preprocessing Class
# ===============================
class Preprocessing():
    def __init__(self, features_file, labels_file):
        self.features_file = features_file
        self.labels_file = labels_file
        self.labels_df = None
        self.features_df = None

    def create_label_df(self):
        self.labels_df = self.labels_file[0].str.replace('-', '_').str.split('_', expand=True)
        self.labels_df = self.labels_df.drop([0, 4], axis=1)
        self.labels_df.columns = ["class", "generator", "category"]

        none_rows = self.labels_df[self.labels_df.isnull().any(axis=1)]
        self.labels_df = self.labels_df.drop(none_rows.index)
        self.features_df = self.features_file.drop(none_rows.index)

    def fix_category_names(self):
        valid_values = ['sea', 'mountain', 'jungle']
        self.labels_df["category"] = self.labels_df["category"].replace({"forest": "jungle"})
        self.labels_df["category"] = self.labels_df["category"].apply(
            lambda x: process.extractOne(x.lower(), valid_values, scorer=fuzz.token_set_ratio)[0])

    def fix_generator_names(self):
        valid_values = ["none", "stable", "dalle", "dream", "midjourney", "craiyon"]
        self.labels_df["generator"] = self.labels_df["generator"].apply(
            lambda x: process.extractOne(x.lower(), valid_values, scorer=fuzz.token_set_ratio)[0])

    def fix_class_names(self):
        self.labels_df["class"] = self.labels_df["class"].str.lower()

    def normalize(self):
        self.features_df = pd.DataFrame(StandardScaler().fit_transform(self.features_df))

    def preprocess(self):
        self.create_label_df()
        self.fix_category_names()
        self.fix_generator_names()
        self.fix_class_names()
        self.normalize()

    def get_dataframes(self):
        return self.labels_df, self.features_df



In [3]:

# ===============================
# 2. Load Deep Features & Labels
# ===============================
features_path = r"C:\Desktop\MLAssignment\features.csv"
labels_path = r"C:\Desktop\MLAssignment\labels.csv"

features_df = pd.read_csv(features_path, header=None)
labels_df = pd.read_csv(labels_path, header=None)

In [4]:
features_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,0.522970,0.299877,1.854656,-0.127650,0.813650,-0.148206,0.086093,-0.099617,-0.060342,2.059088,...,-0.105705,-0.139349,-0.058845,2.431875,0.062066,-0.245895,0.086882,-0.126205,0.218028,0.169009
1,0.013961,0.122444,-0.132735,-0.118628,0.173108,-0.136272,-0.084335,-0.131051,0.161909,0.824182,...,-0.113773,-0.165884,-0.113729,0.221492,-0.142989,-0.146476,0.226548,-0.138845,0.201534,0.249506
2,0.538563,0.509407,1.775910,-0.127933,1.709609,-0.140871,0.127283,-0.092119,-0.083881,1.833654,...,-0.083452,-0.139409,-0.090185,1.966274,-0.121026,-0.134771,0.824233,-0.107500,0.530300,0.368836
3,-0.074457,-0.116068,-0.196760,-0.011271,-0.168376,1.241772,-0.116697,-0.029380,-0.040485,-0.107858,...,0.860665,-0.084137,0.326265,-0.254838,-0.131203,0.494691,-0.008144,-0.004061,-0.208142,-0.168128
4,-0.051129,-0.174848,-0.028430,1.820420,-0.089466,2.193471,0.114552,-0.029080,3.765160,-0.053691,...,5.197541,1.984923,1.948692,-0.195730,-0.029222,3.375637,0.439649,0.881086,-0.012821,-0.026665
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3413,0.188845,0.181740,0.000457,-0.133928,0.575991,-0.167212,0.093093,-0.111037,-0.112299,0.514472,...,-0.075440,-0.164179,-0.101755,0.973446,0.586366,-0.112277,-0.200143,-0.107301,0.880406,0.564792
3414,0.069198,-0.091497,0.124390,-0.169842,0.235213,-0.221722,-0.084336,-0.168707,-0.071297,0.257700,...,-0.099730,-0.154313,-0.100462,0.484658,0.865008,-0.115266,-0.130826,-0.148095,0.845961,0.496394
3415,0.220922,-0.025789,0.154959,-0.162007,0.248210,-0.193766,-0.148826,-0.153745,-0.106148,1.394486,...,-0.100166,-0.174926,-0.096002,0.072601,-0.136642,-0.139678,0.284274,-0.143948,0.323482,0.430027
3416,-0.099168,-0.126265,-0.089775,0.245519,-0.126568,1.240803,-0.120931,-0.040557,1.584138,-0.139105,...,1.359986,0.430271,0.511444,-0.260776,-0.101833,1.298561,0.006774,0.390692,-0.125927,-0.158199


In [5]:
labels_df

Unnamed: 0,0
0,810101213_fake_stable_mountain_2.png
1,810701006_fake_stable_jungle_2.jpeg
2,810199494_fake_stable_mountain_4.jpeg
3,810199567_real_none_jungle_6.jpeg
4,810101149_real_none_jungle_1.jpg
...,...
3413,810101201_fake_dallemini_jungle_5.jpeg
3414,810198375_fake_stable_mountain_4.jpg
3415,810199427_fake_stable_mountain_5.jpeg
3416,810197636_real_none_jungle_7.jpeg


In [6]:

# ===============================
# 3. Preprocess Deep Features
# ===============================
prep = Preprocessing(features_df, labels_df)
prep.preprocess()
labels, features = prep.get_dataframes()


In [7]:
labels

Unnamed: 0,class,generator,category
0,fake,stable,mountain
1,fake,stable,jungle
2,fake,stable,mountain
3,real,none,jungle
4,real,none,jungle
...,...,...,...
3413,fake,dalle,jungle
3414,fake,stable,mountain
3415,fake,stable,mountain
3416,real,none,jungle


In [8]:
features

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1270,1271,1272,1273,1274,1275,1276,1277,1278,1279
0,2.873741,0.125029,2.311658,-0.496390,0.772084,-0.638401,-0.214220,-0.392080,-0.529741,2.037122,...,-0.578911,-0.569512,-0.442987,2.217323,-0.208017,-0.790129,-0.071660,-0.632013,0.028489,0.308658
1,-0.043403,-0.147687,-0.595956,-0.472900,-0.145721,-0.614686,-0.506408,-0.574083,-0.238171,0.635097,...,-0.588962,-0.611699,-0.606316,-0.048074,-0.432206,-0.655222,0.261084,-0.666814,-0.004990,0.588534
2,2.963107,0.447080,2.196451,-0.497126,2.055862,-0.623826,-0.143602,-0.348666,-0.560621,1.781180,...,-0.551189,-0.569608,-0.536250,1.740134,-0.408193,-0.639337,1.685031,-0.580515,0.662347,1.003422
3,-0.550129,-0.514283,-0.689627,-0.193364,-0.635018,2.123565,-0.561890,0.014594,-0.503691,-0.423074,...,0.624958,-0.481733,0.703064,-0.536260,-0.419320,0.214822,-0.298054,-0.295719,-0.836560,-0.863506
4,-0.416433,-0.604628,-0.443354,4.575988,-0.521952,4.014644,-0.165429,0.016330,4.488897,-0.361577,...,6.027683,2.807767,5.531244,-0.475681,-0.307823,4.124170,0.768783,2.141315,-0.440092,-0.371667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3413,0.958861,-0.056549,-0.401092,-0.512737,0.431552,-0.676167,-0.202219,-0.458205,-0.597903,0.283475,...,-0.541208,-0.608988,-0.570683,0.722595,0.365205,-0.608815,-0.755480,-0.579967,1.373000,1.684726
3414,0.273164,-0.476517,-0.219774,-0.606251,-0.056734,-0.784480,-0.506410,-0.792115,-0.544112,-0.008045,...,-0.571468,-0.593303,-0.566833,0.221642,0.669846,-0.612870,-0.590337,-0.692283,1.303083,1.446917
3415,1.142695,-0.375522,-0.175051,-0.585849,-0.038110,-0.728930,-0.616973,-0.705486,-0.589833,1.282580,...,-0.572011,-0.626074,-0.553561,-0.200671,-0.425266,-0.645996,0.398613,-0.680866,0.242542,1.216172
3416,-0.691748,-0.529955,-0.533103,0.475267,-0.575113,2.121638,-0.569148,-0.050121,1.627636,-0.458550,...,1.246994,0.336100,1.254137,-0.542346,-0.387209,1.305646,-0.262513,0.791136,-0.669677,-0.828985


In [9]:

# ===============================
# 4. Train-Test Split
# ===============================
X_train, X_test, y_train, y_test = train_test_split(features, labels['class'], test_size=0.3, random_state=42)


In [10]:

# ===============================
# 5. Encode Labels
# ===============================
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_test_encoded = encoder.transform(y_test)


In [11]:

# ===============================
# 6. Train SVM Model
# ===============================
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train, y_train_encoded)


In [12]:

# ===============================
# 7. Evaluate Model
# ===============================
y_pred = svm_model.predict(X_test)

In [13]:

print("✅ Classification Report:\n")
print(classification_report(y_test_encoded, y_pred, target_names=encoder.classes_))


✅ Classification Report:

              precision    recall  f1-score   support

        fake       1.00      1.00      1.00       519
        real       1.00      1.00      1.00       507

    accuracy                           1.00      1026
   macro avg       1.00      1.00      1.00      1026
weighted avg       1.00      1.00      1.00      1026



In [14]:

print("✅ Confusion Matrix:\n")
print(confusion_matrix(y_test_encoded, y_pred))

✅ Confusion Matrix:

[[518   1]
 [  0 507]]


In [15]:

print("✅ Accuracy Score:", accuracy_score(y_test_encoded, y_pred))

✅ Accuracy Score: 0.9990253411306043
