### SVM classifier trained on images (real and generated ones from 4 generators) from ELSA compressed in jpg with quality 40 and coming. The classifier is tested on images from synthbuster

In [1]:
%load_ext autoreload
%autoreload 2

In [76]:
import sys
sys.path.append("../tools")
from utils import load_data_features, load_synthbuster_balanced, map_synthbuster_classes
from sklearn.svm import LinearSVC
from datasets import load_from_disk
import open_clip
from IPython.display import clear_output
import numpy as np

In [3]:
model, _, preprocess = open_clip.create_model_and_transforms('hf-hub:laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K',device="cuda")

model.eval()
clear_output()

In [4]:
def label_conversion(e):
    e["label"] = 1 if e["label"] == "real" else 0
    return e

In [95]:
X_sb, y_sb = load_synthbuster_balanced("../../data/synthbuster_test",
                                       binary_classification=True)

In [17]:
X_40, y_40 = load_data_features("../../data/big_QF_40_features","train")
X_h, y_h = load_data_features("../../data/holistic","train")

### Train on big_QF_40_features (1 generator) / Test on holistic (4 generators)

In [84]:
clf = LinearSVC(dual="auto")
clf.fit(X_40,y_40).score(X_h,y_h)

0.641

### Train on holistic (4 generators) / Test on big_QF_40_features (1 generator)

In [19]:
clf = LinearSVC(dual="auto")
clf.fit(X_h,y_h).score(X_40,y_40)

0.9426

### Train on holistic (4 generators) / Test on synthbuster

In [20]:
clf.fit(X_h,y_h).score(X_sb,y_sb)

0.6444199116820554

### Train on synthbuster (9 generators) / Test on holistic (4 generators)

In [97]:
clf.fit(X_sb, y_sb).score(X_h,y_h)

0.6202

### Train and test on synthbuster

In [50]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_sb,y_sb,shuffle=True,test_size=.2)
print(X_train.shape)
clf.fit(X_train,y_train).score(X_test,y_test)

(7971, 768)


0.982940291018565

### Train and test on big_QF_40 (1 generator)

In [53]:
X_train, X_test, y_train, y_test = train_test_split(X_40,y_40,shuffle=True,test_size=.2)

clf.fit(X_train,y_train).score(X_test,y_test)

0.983

# Train on concatenation of synthbuster + holistic and test on holistic

## train on synthbuster + holistic

In [70]:
X_sb_train, X_sb_test, y_sb_train, y_sb_test = train_test_split(X_sb, y_sb,test_size=.2)
X_h_train, y_h_train = load_data_features("../../data/holistic",split="train")
X_h_test, y_h_test = load_data_features("../../data/holistic/",split="test")

X_train = np.vstack((X_sb_train,X_h_train))
y_train = np.hstack((y_sb_train,y_h_train))

clf.fit(X_train,y_train)

## test on holistic

In [61]:
clf.score(X_h_test,y_h_test)

0.9501

## test on synthbuster

In [62]:
clf.score(X_sb_test,y_sb_test)

0.963371801304566

## test on big_QF_40

In [63]:
X_40_test, y_40_test = load_data_features("../../data/big_QF_40_features",split="test")
clf.score(X_40,y_40)

0.9445

# Multi class classifier

In [106]:
from sklearn.multiclass import OneVsOneClassifier, OneVsRestClassifier

ovo = OneVsOneClassifier(LinearSVC(dual="auto"))
ovr = OneVsRestClassifier(LinearSVC(dual="auto"))
X_sb, y_sb = load_synthbuster_balanced("../../data/synthbuster_test",
                                       binary_classification=False)

In [108]:
X_train, X_test, y_train, y_test = train_test_split(X_sb, 
                                                    y_sb, 
                                                    shuffle=True,
                                                    random_state=7, 
                                                    test_size=.2)

ovo.fit(X_train,y_train).score(X_test, y_test)

0.802

In [109]:
ovr.fit(X_train, y_train).score(X_test,y_test)

0.793