In [3]:
import torch
import os

import torchvision
from torch import nn
from torch.utils.data import Dataset
import matplotlib.pyplot as plt
from torchvision.io import read_image
from torch.utils.data import DataLoader
import torch.optim as optim

import optuna
from optuna.trial import TrialState


In [4]:
class ImageDatasetFullyRAM(Dataset):  # loads the WHOLE dataset into RAM
    def __init__(self, data_dir, transform=None):
        self.data_dir = data_dir
        self.files = os.listdir(os.path.join(self.data_dir, 'fake'))
        self.files.extend(os.listdir(os.path.join(self.data_dir, 'real')))
        self.items = []
        for file in self.files:
            label = 0 if 'real' in file else 1  # 0=REAL, 1=FAKE
            if label == 1:
                image = read_image(os.path.join(self.data_dir, 'fake', file))
            else:
                image = read_image(os.path.join(self.data_dir, 'real', file))

            if transform:
                image = transform(image)

            self.items.append((image, label))

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        return self.items[idx]



In [5]:
transform = torchvision.transforms.Compose([torchvision.transforms.ToPILImage(), torchvision.transforms.ToTensor(),
                                            torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                             std=[0.229, 0.224, 0.225]), ])

training_data = ImageDatasetFullyRAM('..\data\\training', transform=transform)
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)

validation_data = ImageDatasetFullyRAM('..\data\\validation', transform=transform)
validation_dataloader = DataLoader(validation_data, batch_size=64, shuffle=True)


In [6]:
resnet50 = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)

for param in resnet50.parameters():
    param.requires_grad = False

fc_inputs = resnet50.fc.in_features
resnet50.fc = nn.Sequential(
    nn.Linear(fc_inputs, 512),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(512, 128),
    nn.ReLU(),
    nn.Dropout(0.4),
    nn.Linear(128, 1),
    nn.Sigmoid()
)

Using cache found in C:\Users\odmen/.cache\torch\hub\pytorch_vision_v0.10.0


In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

total_step = len(train_dataloader)
criterion = nn.BCELoss() 
num_epochs = 30


cuda


In [8]:
def objective(trial):
    model = resnet50
    model = nn.DataParallel(model)
    model.to(device)
    optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    loss_list = []
    acc_list = []

    for epoch in range(num_epochs):
        # training
        for i, (images, labels) in enumerate(train_dataloader):
            if device.type == 'cuda':
                images = images.cuda()
                labels = labels.cuda()
            # feed forward 
            outputs = model(images)
            labels = labels.unsqueeze(1).float()
            loss = criterion(outputs, labels)

            # back propagation 
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # accuracy 
            total = labels.size(0)
            predicted = torch.max(outputs.data, 1) 
            correct = (predicted == labels).sum().item()

            if i % 16 == 0:
                print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                      .format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
                              (correct / total) * 100))
                loss_list.append(loss.item())
                acc_list.append(correct / total)

        # validation
        correct_count = 0
        for i, (images, labels) in enumerate(validation_dataloader):
            if device.type == 'cuda':
                images = images.cuda()
                labels = labels.cuda()
            # feed forward 
            outputs = model(images)
            labels = labels.unsqueeze(1).float()

            predicted = torch.max(outputs.data, 1) 
            correct_count += (predicted == labels).sum().item()

        accuracy = correct_count / len(validation_data)
        trial.report(accuracy, epoch)
        print(f'Validated accuracy {accuracy}')
        if trial.should_prune():
            print('PRUNED')
            raise optuna.exceptions.TrialPruned()

    return accuracy


In [11]:
import numpy as np
from bokeh.io import show
from bokeh.plotting import figure
from bokeh.models import LinearAxis, Range1d

model = resnet50
model = nn.DataParallel(model)
model.to(device)
optimizer_name = "Adam"
lr = 1e-2
optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

loss_list = []
acc_list = []

for epoch in range(10):
    # training
    for i, (images, labels) in enumerate(train_dataloader):
        if device.type == 'cuda':
            images = images.cuda()
            labels = labels.cuda()
        # feed forward 
        outputs = model(images)
        labels = labels.unsqueeze(1).float()
        loss = criterion(outputs, labels)

        # back propagation 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # accuracy 
        total = labels.size(0)
        correct = (outputs.round() == labels).sum().item()

        if i % 16 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}, Accuracy: {:.2f}%'
                  .format(epoch + 1, num_epochs, i + 1, total_step, loss.item(),
                          (correct / total) * 100))
            loss_list.append(loss.item())
            acc_list.append(correct / total)

    # validation
    correct_count = 0
    for i, (images, labels) in enumerate(validation_dataloader):
        if device.type == 'cuda':
            images = images.cuda()
            labels = labels.cuda()
        # feed forward 
        outputs = model(images)
        labels = labels.unsqueeze(1).float()

        predicted = (outputs.round()).float()
        correct_count += (predicted == labels).sum().item()

    accuracy = correct_count / len(validation_data)
    print(accuracy)

p = figure(y_axis_label='Loss', width=850, y_range=(0, 1), title='PyTorch ConvNet results')
p.extra_y_ranges = {'Accuracy': Range1d(start=0, end=100)}
p.add_layout(LinearAxis(y_range_name='Accuracy', axis_label='Accuracy (%)'), 'right')
p.line(np.arange(len(loss_list)), loss_list)
p.line(np.arange(len(loss_list)), np.array(acc_list) * 100, y_range_name='Accuracy', color='red')
show(p)


Epoch [1/30], Step [1/29], Loss: 0.7680, Accuracy: 40.62%
Epoch [1/30], Step [17/29], Loss: 42.1875, Accuracy: 57.81%
0.5
Epoch [2/30], Step [1/29], Loss: 51.5625, Accuracy: 48.44%
Epoch [2/30], Step [17/29], Loss: 51.5625, Accuracy: 48.44%
0.5
Epoch [3/30], Step [1/29], Loss: 60.9375, Accuracy: 39.06%
Epoch [3/30], Step [17/29], Loss: 54.6875, Accuracy: 45.31%
0.5
Epoch [4/30], Step [1/29], Loss: 62.5000, Accuracy: 37.50%
Epoch [4/30], Step [17/29], Loss: 48.4375, Accuracy: 51.56%
0.5
Epoch [5/30], Step [1/29], Loss: 46.8750, Accuracy: 53.12%
Epoch [5/30], Step [17/29], Loss: 45.3125, Accuracy: 54.69%
0.5
Epoch [6/30], Step [1/29], Loss: 51.5625, Accuracy: 48.44%
Epoch [6/30], Step [17/29], Loss: 43.7500, Accuracy: 56.25%
0.5
Epoch [7/30], Step [1/29], Loss: 48.4375, Accuracy: 51.56%
Epoch [7/30], Step [17/29], Loss: 59.3750, Accuracy: 40.62%
0.5
Epoch [8/30], Step [1/29], Loss: 59.3750, Accuracy: 40.62%
Epoch [8/30], Step [17/29], Loss: 67.1875, Accuracy: 32.81%
0.5
Epoch [9/30], Ste

In [10]:
study = optuna.create_study(direction="maximize")
study.optimize(objective, n_trials=10, timeout=600)

pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

df = study.trials_dataframe()
df.to_csv('../models/optuna_resnet_results.csv', index=False)
fig = optuna.visualization.plot_intermediate_values(study)
fig.show()


[I 2023-12-17 22:15:41,323] A new study created in memory with name: no-name-6f074acc-16c3-4314-9975-a7164fc425ea
    There is an imbalance between your GPUs. You may want to exclude GPU 1 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
[W 2023-12-17 22:15:43,000] Trial 0 failed with parameters: {'optimizer': 'Adam', 'lr': 0.0014512092185093198} because of the following error: AttributeError("'bool' object has no attribute 'sum'").
Traceback (most recent call last):
  File "C:\Users\odmen\PycharmProjects\deepfakes\venv\lib\site-packages\optuna\study\_optimize.py", line 200, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\odmen\AppData\Local\Temp\ipykernel_17844\3932161542.py", line 31, in objective
    correct = (predicted == labels).sum().item()
AttributeError: 'bool' object has no attribute 'sum'
[W 2023-12-17 22:15:43,01

AttributeError: 'bool' object has no attribute 'sum'