In [43]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [44]:
from MLP_classifier import MultiClassClassifier
from torch.utils.data import DataLoader
from dataset import DeepFakeDatasetFastLoad
import torch.nn as nn
import torch
import sys
sys.path.append("../tools")
from constants import PATH_TO_DATA, SEED
from sklearn.model_selection import train_test_split
from torch.utils.data import random_split

In [39]:
device = 0
model = MultiClassClassifier().cuda(device=device)

lr = 1e-3
batch_size = 64
epochs = 5

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr)

data = DeepFakeDatasetFastLoad("../../data/df_34000.pt")

rng = torch.Generator().manual_seed(SEED)
train_data, test_data = random_split(data,[0.8,0.2],generator=rng)

train_loader = DataLoader(train_data,batch_size=batch_size,shuffle=True)
test_loader  = DataLoader(test_data,batch_size=len(test_data),shuffle=True)

model.train()

MultiClassClassifier(
  (fc1): Linear(in_features=768, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=18, bias=True)
  (act): ReLU()
)

## Train for multi-class classification

In [46]:
model.set_generators_maps(gen_to_int=data.gen_to_int,int_to_gen=data.int_to_gen)

In [47]:
n_epochs = 1000
for epoch in range(1,n_epochs+1):
    for idx, batch in enumerate(train_loader):
        # prediction and loss
        pred = model(batch["features"].cuda(device))
        loss = loss_fn(pred,batch["generator"].cuda(device))

        # backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
    loss, current = loss.item(), idx*batch_size + len(batch["features"])
    if epoch%10 == 0 and epoch > 0:
        print(f"loss: {loss:>7f}  [{epoch:>5d}/{n_epochs:>5d}]")

loss: 0.098827  [   10/ 1000]
loss: 0.065564  [   20/ 1000]
loss: 0.070788  [   30/ 1000]
loss: 0.056254  [   40/ 1000]
loss: 0.088859  [   50/ 1000]
loss: 0.142530  [   60/ 1000]
loss: 0.052894  [   70/ 1000]
loss: 0.063017  [   80/ 1000]
loss: 0.087643  [   90/ 1000]
loss: 0.041732  [  100/ 1000]
loss: 0.062522  [  110/ 1000]
loss: 0.089441  [  120/ 1000]
loss: 0.060966  [  130/ 1000]
loss: 0.051340  [  140/ 1000]
loss: 0.070046  [  150/ 1000]
loss: 0.070173  [  160/ 1000]
loss: 0.027125  [  170/ 1000]
loss: 0.080948  [  180/ 1000]
loss: 0.049271  [  190/ 1000]
loss: 0.040883  [  200/ 1000]
loss: 0.078709  [  210/ 1000]
loss: 0.032503  [  220/ 1000]
loss: 0.066351  [  230/ 1000]
loss: 0.047431  [  240/ 1000]
loss: 0.038480  [  250/ 1000]
loss: 0.045655  [  260/ 1000]
loss: 0.064606  [  270/ 1000]
loss: 0.062703  [  280/ 1000]
loss: 0.047547  [  290/ 1000]
loss: 0.090837  [  300/ 1000]
loss: 0.040277  [  310/ 1000]
loss: 0.049093  [  320/ 1000]
loss: 0.071460  [  330/ 1000]
loss: 0.08

## Test for binary classification

In [56]:
import torch.types

with torch.no_grad():
    model.eval()
    for e in test_loader:
        accuracy = model.get_model_accuracy_binary(features=e["features"],
                                                   true_labels=e["label"],
                                                   device="cuda:" + str(device))
print(accuracy)

0.9730882048606873


### Comparison with SVM

In [7]:
from sklearn.svm import LinearSVC
from sklearn.multiclass import OneVsOneClassifier

clf = OneVsOneClassifier(LinearSVC(dual="auto"))

In [8]:
train_loader_all = DataLoader(train_data,batch_size=len(train_data))
for e in train_loader_all:
    X_train = e["features"]
    gen_train = e["generator"]
    label_train = e["label"]
for e in test_loader:
    X_test = e["features"]
    gen_test = e["generator"]
    label_test = e["label"]

clf.fit(X_train, gen_train) # train on multi-class classification

In [9]:
import numpy as np
pred = data.class_to_label(clf.predict(X_test))
np.mean(label_test.numpy() == pred.numpy()) # binary classification performance

0.97

## Test on multi-class classification

### Neural Network

In [60]:
import torch.types

with torch.no_grad():
    model.eval()
    for e in test_loader:
        accuracy = model.get_model_accuracy_multiclass(features=e["features"],
                                            true_classes=e["generator"],
                                            device="cuda:"+str(device))
print(accuracy)

tensor([ 0,  4, 12,  ...,  0,  9,  1])
0.8789705634117126


### SVM

In [11]:
clf.fit(X_train,gen_train).score(X_test,gen_test)

0.8730882352941176

# Saving the model

In [16]:
# torch.save(model.state_dict(),"./checkpoints/multiclass_1000epochs_0.08loss.pt")

# Loading the model

In [17]:
model2 = MultiClassClassifier()
model2.load_state_dict(torch.load("./checkpoints/multiclass_1000epochs_0.08loss.pt"))

<All keys matched successfully>

In [30]:
model2.eval().cuda(device)
with torch.no_grad():
    for e in test_loader:
        pred = torch.argmax(model2(e["features"].cuda(device)),dim=1)
        acc = torch.mean(torch.eq(e["generator"].cuda(device),pred).float()).item()
        print(acc)

0.8783823251724243


# Train on data3 test on SB

In [32]:
from utils import load_synthbuster_balanced
X_sb, y_sb = load_synthbuster_balanced("../../data/synthbuster_test",
                                       binary_classification=True,
                                       balance_real_fake=True)

In [61]:
model.get_model_accuracy_binary(torch.Tensor(X_sb).cuda(device),
                                torch.Tensor(y_sb).cuda(device),
                                "cuda:"+str(device))

0.38087114691734314