In [1]:
%load_ext autoreload
%autoreload 2

In [27]:
from MLP_classifier import MultiClassClassifier
from torch.utils.data import DataLoader
import sys
sys.path.append(".")
from dataset import DeepFakeDatasetFastLoad, OOD, DeepFakeTest
import torch.nn as nn
import torch
sys.path.append("../tools")
from constants import PATH_TO_DATA, SEED, PATH_TO_DATA4, GEN_TO_INT, INT_TO_GEN, GEN_TO_GEN
from sklearn.model_selection import train_test_split
from torch.utils.data import random_split
import matplotlib.pyplot as plt
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsOneClassifier

In [None]:
device = "cuda:0"
model = MultiClassClassifier(n_classes=len(GEN_TO_INT.keys())).to(device=device)

lr = 1e-3
batch_size = 64
epochs = 5

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

data = DeepFakeDatasetFastLoad(PATH_TO_DATA4 + "df_34000_V2.pt")

rng = torch.Generator().manual_seed(SEED)
train_data, test_data, validation_data = random_split(data,[0.7,0.2,0.1],generator=rng)

train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True)
test_loader  = DataLoader(test_data,batch_size=len(test_data),shuffle=True)
val_loader  = DataLoader(validation_data,batch_size=len(validation_data),shuffle=True)

model.train()

## Train for multi-class classification

In [None]:
model.set_generators_maps(gen_to_int=GEN_TO_INT,int_to_gen=INT_TO_GEN)

In [None]:
n_epochs = 1000
val_accuracy = []
loss_history = []
for epoch in range(1,n_epochs+1):
    for idx, batch in enumerate(train_loader):
        # prediction and loss
        pred = model(batch["features"].cuda(device))
        loss = loss_fn(pred,batch["generator"].type(torch.LongTensor).cuda(device))

        # backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    loss, current = loss.item(), idx*batch_size + len(batch["features"])
    if epoch%10 == 0 and epoch > 0:
        loss_history.append(loss)
        for v in val_loader:
            val_accuracy.append(
                model.get_model_accuracy_binary(v["features"],
                                                v["label"],
                                                device))
        print(f"loss: {loss:>7f}  [{epoch:>5d}/{n_epochs:>5d}]")

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1,figsize=(10,6))
ax1.set_title("loss")
ax2.set_title("accuracy on validation")
ax1.plot(range(1,n_epochs+1,10),loss_history)
ax2.plot(range(1,n_epochs+1,10),val_accuracy)
ax1.grid()
ax2.grid() 
plt.show()

## Test for binary classification

In [None]:
import torch.types

with torch.no_grad():
    model.eval()
    for e in test_loader:
        accuracy = model.get_model_accuracy_binary(features=e["features"],
                                                   true_labels=e["label"],
                                                   device=device)
print(accuracy)

### Comparison with SVM

In [None]:
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsOneClassifier

clf = OneVsOneClassifier(LinearSVC(dual="auto"))

In [None]:
train_loader_all = DataLoader(train_data,batch_size=len(train_data))
for e in train_loader_all:
    X_train = e["features"]
    gen_train = e["generator"]
    label_train = e["label"]
for e in test_loader:
    X_test = e["features"]
    gen_test = e["generator"]
    label_test = e["label"]

clf.fit(X_train, gen_train) # train on multi-class classification

In [None]:
import numpy as np
pred = data.class_to_label(clf.predict(X_test))
np.mean(label_test.numpy() == pred.numpy()) # binary classification performance

## Test on multi-class classification

### Neural Network

In [None]:
import torch.types

with torch.no_grad():
    model.eval()
    for e in test_loader:
        accuracy = model.get_model_accuracy_multiclass(features=e["features"],
                                            true_classes=e["generator"],
                                            device="cuda:"+str(device))
print(accuracy)

### SVM

In [None]:
clf.fit(X_train,gen_train).score(X_test,gen_test)

# Saving the model

In [None]:
path = f"./checkpoints/multiclass_epochs={n_epochs}_loss={loss}.pt"
torch.save(model.state_dict(),path)

# Loading the model

In [None]:
model2 = MultiClassClassifier(n_classes=len(GEN_TO_INT.keys()))
model2.load_state_dict(torch.load(path))

In [None]:
model2.eval().cuda(device)
with torch.no_grad():
    for e in test_loader:
        pred = torch.argmax(model2(e["features"].cuda(device)),dim=1)
        acc = torch.mean(torch.eq(e["generator"].cuda(device),pred).float()).item()
        print(acc)

# Train on data3 test on SB

In [None]:
from utils import load_synthbuster_balanced
X_sb, y_sb = load_synthbuster_balanced(PATH_TO_DATA4 + "synthbuster_test",
                                       binary_classification=True,
                                       balance_real_fake=True)

In [None]:
model.get_model_accuracy_binary(torch.Tensor(X_sb).cuda(device),
                                torch.Tensor(y_sb).cuda(device),
                                device)

# Test on OOD

In [None]:
ood = OOD("../../data/ood",load_preprocessed=False,device="cuda:" + str(device))

In [None]:
# ood.save("../../data/ood.pt")

In [None]:
ood2 = OOD(PATH_TO_DATA4 + "ood.pt",load_preprocessed=True)

In [None]:
loader_test = DataLoader(ood2, batch_size=len(ood2), shuffle=True)

In [None]:
for e in loader_test:
    model.eval()
    with torch.no_grad():
        accuracy = model.get_model_accuracy_binary(e["features"],e["label"],device)

print(accuracy)

# Training a binary classifier

In [4]:
device = "cuda:1"
clf_binary = MultiClassClassifier(n_classes=2).to(device)

lr = 1e-3
batch_size = 128
n_epochs = 1000

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(clf_binary.parameters(), lr=lr)

data = DeepFakeDatasetFastLoad(PATH_TO_DATA4 + "df_34000_V2.pt")

rng = torch.Generator().manual_seed(SEED)
train_data, test_data, validation_data = random_split(data,[0.7,0.2,0.1],generator=rng)

train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True)
test_loader  = DataLoader(test_data,batch_size=len(test_data),shuffle=True)
val_loader  = DataLoader(validation_data,batch_size=len(validation_data),shuffle=True)

clf_binary.train()

MultiClassClassifier(
  (fc1): Linear(in_features=768, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=2, bias=True)
  (act): ReLU()
)

In [None]:
val_accuracy = []
loss_history = []
for epoch in range(1,n_epochs+1):
    for idx, batch in enumerate(train_loader):
        # prediction and loss
        pred = clf_binary(batch["features"].to(device))
        loss = loss_fn(pred,batch["label"].type(torch.LongTensor).to(device))

        # backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    loss, current = loss.item(), idx*batch_size + len(batch["features"])
    if epoch%10 == 0 and epoch > 0:
        loss_history.append(loss)
        for v in val_loader:
            pred = torch.argmax(clf_binary(v["features"].to(device)),dim=1)
            val_accuracy.append(torch.mean(torch.eq(v["label"].to(device),pred).float()).item())
        print(f"loss: {loss:>7f}  [{epoch:>5d}/{n_epochs:>5d}]")

In [None]:
fig, (ax1, ax2) = plt.subplots(2,1,figsize=(10,6))
ax1.set_title("loss")
ax2.set_title("accuracy on validation")
ax1.plot(range(1,n_epochs+1,10),loss_history)
ax2.plot(range(1,n_epochs+1,10),val_accuracy)
ax1.grid()
ax2.grid() 
plt.show()

In [None]:
# path = f"./checkpoints/binary_epochs={n_epochs}_loss={loss}.pt"
# torch.save(clf_binary.state_dict(),path)

## Binary classifier Test VS MultiClass classifier

In [6]:
clf_binary.load_state_dict(torch.load("./checkpoints/binary_epochs=1000_loss=0.02289319783449173.pt"))
clf_binary.eval()

<All keys matched successfully>

In [16]:
model = MultiClassClassifier(n_classes=len(GEN_TO_INT.keys()))
model.eval()
model.load_state_dict(torch.load("./checkpoints/multiclass_epochs=1000_loss=0.0692238137125969.pt"))
model.set_generators_maps(GEN_TO_INT,INT_TO_GEN)

In [17]:
for e in test_loader:
    with torch.no_grad():
        print("Accuracy with binary classifier:     ",clf_binary.get_model_accuracy_binary(e["features"],e["label"],device,binary_model=True))
        print("Accuracy with multi-class classifier:",model.get_model_accuracy_binary(e["features"],e["label"],"cpu",binary_model=False))

Accuracy with binary classifier:      0.9754411578178406
Accuracy with multi-class classifier: 0.9752941131591797


## Test on AID_TEST

In [47]:
data_test = DeepFakeTest(PATH_TO_DATA4 + "df_test_60000.pt",
                         load_from_disk=True)

with torch.no_grad():
    print("Accuracy with binary classifier:     ",
          clf_binary.get_model_accuracy_binary(data_test.features,
                                               data_test.label,
                                               device,
                                               binary_model=True))
    print("Accuracy with multi-class classifier:",
          model.get_model_accuracy_binary(data_test.features,
                                          data_test.label,
                                          "cpu",
                                          binary_model=False))

Accuracy with binary classifier:      0.7139333486557007
Accuracy with multi-class classifier: 0.7297499775886536


# Accuracy per class

In [74]:
d_AID = {}
d_AID_TEST = {}

for e in test_loader:
    features = e["features"]
    label    = e["label"]
    gen      = e["generator"]

for key in GEN_TO_GEN:
    d_AID[key] = {
        "features": features[gen == GEN_TO_INT[key]],
        "label": label[gen == GEN_TO_INT[key]]
    }
    d_AID_TEST[key] = {
        "features": data_test.features[data_test.gen == GEN_TO_INT[key]],
        "label"   : data_test.label[data_test.gen == GEN_TO_INT[key]]}

In [77]:
torch.bincount(data.gen)

tensor([17000,  3000,  2000,  1000,  1000,  1000,  1000,  1000,  1000,  1000,
         1000,  1000,  2000,     0,     0,  1000])

In [79]:
torch.bincount(data_test.gen.int())

tensor([30000,  6000,  3000,  2000,  2000,  1000,  1000,  1000,  1000,  1000,
         1000,  3000,  4000,  1000,  1000,  2000], device='cuda:0')

In [81]:
accuracy_AID = {"binary":{},"multi":{}} # accuracy on the test data from the same folder as training data
accuracy_AID_TEST = {"binary":{},"multi":{}} # accuracy on data from AID_TEST (can be seen as OOD)
for g in d_AID:
    with torch.no_grad():
        accuracy_AID["binary"][g] = \
            model.get_model_accuracy_binary(d_AID[g]["features"],
                                            d_AID[g]["label"],
                                            "cpu",
                                            binary_model=False)
        
        accuracy_AID["multi"][g] = \
            model.get_model_accuracy_multiclass(
                d_AID[g]["features"],
                torch.ones_like(d_AID[g]["label"]) * GEN_TO_INT[g],
                "cpu")

        accuracy_AID_TEST["binary"][g] = \
            model.get_model_accuracy_binary(d_AID_TEST[g]["features"],
                                            d_AID_TEST[g]["label"],
                                            "cpu",
                                            binary_model=False)
        
        accuracy_AID_TEST["multi"][g] = \
            model.get_model_accuracy_multiclass(
                d_AID_TEST[g]["features"],
                torch.ones_like(d_AID_TEST[g]["label"]) * GEN_TO_INT[g],
                "cpu")

display("binary accuracy on test from AID:",accuracy_AID["binary"])
display("binary accuracy on AID_TEST:     ",accuracy_AID_TEST["binary"])
display("multi-class accuracy on test from AID:",accuracy_AID["multi"])        
display("multi-class accuracy on AID_TEST:",accuracy_AID_TEST["multi"])        

'binary accuracy on test from AID:'

{'null': 0.9822364449501038,
 'Stable diffusion': 0.9640522599220276,
 'Kandisky': 0.9897435903549194,
 'DF_XL': 0.9519230723381042,
 'dreamlike': 0.9402984976768494,
 'gigaGan': 0.8612716794013977,
 'GlideUP': 0.9902912378311157,
 'LafitteUP': 0.9952830076217651,
 'LCM_Dreamshaper': 0.994535505771637,
 'megaDallEUP': 0.9892473220825195,
 'miniDallEUP': 0.9948186278343201,
 'pixart': 0.9947368502616882,
 'playground': 0.9410377144813538,
 'styleGan2': nan,
 'styleGan3': nan,
 'animagineXL': 0.9893617033958435}

'binary accuracy on AID_TEST:     '

{'null': 0.562666654586792,
 'Stable diffusion': 0.9446666836738586,
 'Kandisky': 0.8793333172798157,
 'DF_XL': 0.8870000243186951,
 'dreamlike': 0.7275000214576721,
 'gigaGan': 0.8100000023841858,
 'GlideUP': 0.9940000176429749,
 'LafitteUP': 0.9980000257492065,
 'LCM_Dreamshaper': 0.9800000190734863,
 'megaDallEUP': 0.9909999966621399,
 'miniDallEUP': 0.9929999709129333,
 'pixart': 0.890666663646698,
 'playground': 0.8812500238418579,
 'styleGan2': 0.7480000257492065,
 'styleGan3': 0.6819999814033508,
 'animagineXL': 0.9884999990463257}

'multi-class accuracy on test from AID:'

{'null': 0.9822364449501038,
 'Stable diffusion': 0.8235294222831726,
 'Kandisky': 0.8487179279327393,
 'DF_XL': 0.870192289352417,
 'dreamlike': 0.6368159055709839,
 'gigaGan': 0.7398843765258789,
 'GlideUP': 0.9368932247161865,
 'LafitteUP': 0.9905660152435303,
 'LCM_Dreamshaper': 0.9234972596168518,
 'megaDallEUP': 0.8655914068222046,
 'miniDallEUP': 0.9430052042007446,
 'pixart': 0.699999988079071,
 'playground': 0.7429245114326477,
 'styleGan2': nan,
 'styleGan3': nan,
 'animagineXL': 0.9042553305625916}

'multi-class accuracy on AID_TEST:'

{'null': 0.562666654586792,
 'Stable diffusion': 0.6356666684150696,
 'Kandisky': 0.6086666584014893,
 'DF_XL': 0.07699999958276749,
 'dreamlike': 0.09099999815225601,
 'gigaGan': 0.2809999883174896,
 'GlideUP': 0.9430000185966492,
 'LafitteUP': 0.9950000047683716,
 'LCM_Dreamshaper': 0.8920000195503235,
 'megaDallEUP': 0.9100000262260437,
 'miniDallEUP': 0.9169999957084656,
 'pixart': 0.07599999755620956,
 'playground': 0.10724999755620956,
 'styleGan2': 0.0,
 'styleGan3': 0.0,
 'animagineXL': 0.4230000078678131}

In [84]:
import pandas as pd

# Data provided by the user
data = {
    'Generator': ['null', 'Stable diffusion', 'Kandisky', 'DF_XL', 'dreamlike', 'gigaGan', 'GlideUP', 'LafitteUP', 'LCM_Dreamshaper', 'megaDallEUP', 'miniDallEUP', 'pixart', 'playground', 'styleGan2', 'styleGan3', 'animagineXL'],
    'Binary Accuracy on Test from AID': [0.9822364449501038, 0.9640522599220276, 0.9897435903549194, 0.9519230723381042, 0.9402984976768494, 0.8612716794013977, 0.9902912378311157, 0.9952830076217651, 0.994535505771637, 0.9892473220825195, 0.9948186278343201, 0.9947368502616882, 0.9410377144813538, float('nan'), float('nan'), 0.9893617033958435],
    'Binary Accuracy on AID_TEST': [0.562666654586792, 0.9446666836738586, 0.8793333172798157, 0.8870000243186951, 0.7275000214576721, 0.8100000023841858, 0.9940000176429749, 0.9980000257492065, 0.9800000190734863, 0.9909999966621399, 0.9929999709129333, 0.890666663646698, 0.8812500238418579, 0.7480000257492065, 0.6819999814033508, 0.9884999990463257],
    'Multi-class Accuracy on Test from AID': [0.9822364449501038, 0.8235294222831726, 0.8487179279327393, 0.870192289352417, 0.6368159055709839, 0.7398843765258789, 0.9368932247161865, 0.9905660152435303, 0.9234972596168518, 0.8655914068222046, 0.9430052042007446, 0.699999988079071, 0.7429245114326477, float('nan'), float('nan'), 0.9042553305625916],
    'Multi-class Accuracy on AID_TEST': [0.562666654586792, 0.6356666684150696, 0.6086666584014893, 0.07699999958276749, 0.09099999815225601, 0.2809999883174896, 0.9430000185966492, 0.9950000047683716, 0.8920000195503235, 0.9100000262260437, 0.9169999957084656, 0.07599999755620956, 0.10724999755620956, 0.0, 0.0, 0.4230000078678131]
}

# Creating the pandas DataFrame
df = pd.DataFrame(data)

In [85]:
df

Unnamed: 0,Generator,Binary Accuracy on Test from AID,Binary Accuracy on AID_TEST,Multi-class Accuracy on Test from AID,Multi-class Accuracy on AID_TEST
0,,0.982236,0.562667,0.982236,0.562667
1,Stable diffusion,0.964052,0.944667,0.823529,0.635667
2,Kandisky,0.989744,0.879333,0.848718,0.608667
3,DF_XL,0.951923,0.887,0.870192,0.077
4,dreamlike,0.940298,0.7275,0.636816,0.091
5,gigaGan,0.861272,0.81,0.739884,0.281
6,GlideUP,0.990291,0.994,0.936893,0.943
7,LafitteUP,0.995283,0.998,0.990566,0.995
8,LCM_Dreamshaper,0.994536,0.98,0.923497,0.892
9,megaDallEUP,0.989247,0.991,0.865591,0.91


In [87]:
df.to_csv("../../docs/binary_multi_acc_AID_AID_TEST_neural_network.csv",index=False)