In [None]:
#import libraries
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, models, transforms
from torch.utils.data import Dataset,DataLoader,random_split
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import os
import cv2
from PIL import Image,ImageDraw,ImageFile
import torch.optim as optim
import csv
import pandas as pd
import copy
from torch.optim import lr_scheduler
import tensorflow as tf
import torch.utils.checkpoint
from torch.optim.lr_scheduler import CosineAnnealingLR
from google.colab import drive
drive.mount('/content/drive')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

In [None]:
! unzip "/content/drive/My Drive/Finetuning_Artwork_models/Dataset_arte_fake.zip"

In [3]:
import zipfile

with zipfile.ZipFile("/content/drive/My Drive/Finetuning_Artwork_models/Immagini_vere.zip", 'r') as zip:
    zip.extractall('')

In [5]:
#Inserire i due folder(real e fake) in una nuova cartella

import shutil

real_path = 'Immagini_vere'
fake_path = 'Dataset_arte_fake'

os.mkdir("Artworks")

shutil.move(real_path,'Artworks')
shutil.move(fake_path,'Artworks')


'Artworks/Dataset_arte_fake'

ARTWORK DATASET

In [6]:
#Classe per gestire gli artwork rispetto alla loro autenticità

class ArtworkDataset(Dataset):
  def __init__(self,links,transform):
      self.data = links
      self.transform = transform

  def __len__(self):
    return self.data.index.shape[0]
    
  def __getitem__(self,idx):
        img = Image.open(self.data.iloc[idx,0])
        label_index = self.data.iloc[idx, 1]
        if (img.mode != 'RGB'):
            img = img.convert('RGB')
        if self.transform:
            img = self.transform(img)
        return img, label_index


In [7]:
# Creazione del csv contenente il path degli artwork e l'autenticità

path = 'Artworks' 
real_dir = 'Immagini_vere'
fake_dir = 'Dataset_arte_fake'

data = [] # Crea una lista vuota per immagazinare i percorsi degli artwork e le etichette (real o fake)

# Cicla la cartella degli artwork falsi e aggiunge il percorso e l'etichetta alla lista "data"

for dirpath, dirnames, filenames in os.walk(os.path.join(path,fake_dir)):
    for filename in filenames:
        if filename.endswith(".jpg"): # only consider jpg files
            filepath = os.path.join(dirpath, filename)
            data.append((filepath, "0"))


# Cicla la cartella degli artwork vero e aggiunge il percorso e l'etichetta alla lista "data"

for filename in os.listdir(os.path.join(path,real_dir)):
    if filename.endswith(".jpg"): 
       filepath = os.path.join(path,real_dir,filename)
       data.append((filepath,"1"))

# Converte la lista "data" in un dataframe pandas
df = pd.DataFrame(data, columns=["path", "label"])

# Salva il dataframe in un file csv
df.to_csv("image_labels.csv", index=False)
 

LOAD PRETRAINED MODEL

In [None]:
!pip install timm
import timm

model = timm.create_model('resnet50',pretrained=True, num_classes=2)

model = model.to(device)
print(model)

In [37]:
#Pesi di tutti i modelli resi non allenabili
for param in model.parameters(): 
    param.requires_grad = False



In [38]:
#Reso allenabile l'ultimo layer del modello
for p in model.fc.parameters(): 
    p.requires_grad=True


SPLIT IN TRAINING AND VALIDATION SET

In [39]:
dataset = df
dataset['label'] = dataset['label'].astype(int)
dataset

Unnamed: 0,path,label
0,Artworks/Dataset_arte_fake/stylegan3-r-metface...,0
1,Artworks/Dataset_arte_fake/stylegan3-r-metface...,0
2,Artworks/Dataset_arte_fake/stylegan3-r-metface...,0
3,Artworks/Dataset_arte_fake/stylegan3-r-metface...,0
4,Artworks/Dataset_arte_fake/stylegan3-r-metface...,0
...,...,...
88146,Artworks/Immagini_vere/howard-mehring_untitled...,1
88147,Artworks/Immagini_vere/nicolae-darascu_balcic-...,1
88148,Artworks/Immagini_vere/marc-chagall_the-creati...,1
88149,Artworks/Immagini_vere/julian-alden-weir_the-i...,1


In [40]:
train, validation = train_test_split(dataset.values, stratify=dataset.values[:, 1], test_size=.3, random_state = 1) 

In [41]:
train_links = pd.DataFrame(train, columns = dataset.columns)
validation_links = pd.DataFrame(validation, columns = dataset.columns)

BUILDING DATA LOADERS

In [42]:
data_transforms = transforms.Compose([
                                transforms.Resize(224),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
     
])



batch_size = 32

train_set = ArtworkDataset( train_links, data_transforms)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, 
                               drop_last=False,num_workers=2)

validation_set = ArtworkDataset( validation_links, data_transforms)

validation_loader = DataLoader(validation_set, batch_size=batch_size, shuffle=True, 
                               drop_last=False,num_workers=2)

TRAINING

In [43]:
class EarlyStopping():
    """
    Early stopping to stop the training when the loss does not improve after
    certain epochs.
    """
    def __init__(self, patience=5, min_delta=0.001):
        """
        :param patience: how many epochs to wait before stopping when loss is
               not improving
        :param min_delta: minimum difference between new loss and old loss for
               new loss to be considered as an improvement
        """
        self.patience = patience
        self.min_delta = min_delta
        self.wait = 0
        self.best_loss = None
        self.early_stop = False
    def __call__(self, current_loss):
        if self.best_loss == None:
            self.best_loss = current_loss
        elif (current_loss - self.best_loss) < -self.min_delta:
            self.best_loss = current_loss
            self.wait = 0
            torch.save(model.state_dict(),'RealArt_vs_FakeArt_resnet50.pt')
        else:
            self.wait = self.wait + 1
            print(f"INFO: Early stopping counter {self.wait} of {self.patience}")
            if self.wait >= self.patience:
                self.early_stop = True

In [46]:
def fine_tune(model, train_loader, validation_loader, criterion, optimizer, scheduler, early_stop ,num_epochs = 100):
    best_model = copy.deepcopy(model)
    best_acc = 0.0
    best_epoch=0
    stop = False
    
    for epoch in range(1, num_epochs + 1):
        if stop:
            break
        print(f'Epoch {epoch}/{num_epochs}')
        print('-'*120)

        data_loader = None
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
                data_loader = train_loader
            else:
                model.eval()   # Set model to evaluate mode
                data_loader = validation_loader

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in tqdm(data_loader):
                
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    outputs = nn.Softmax(dim = 1)(outputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / (len(data_loader) * data_loader.batch_size)
            epoch_acc = running_corrects.double() / (len(data_loader) * data_loader.batch_size)

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            
            
            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_epoch = epoch
                best_model = copy.deepcopy(model)
               
                
            if phase == 'val':
                early_stop(epoch_loss)
                print('-'*120, end = '\n\n')
                stop=early_stop.early_stop
                
                
    print(f'Best val Acc: {best_acc:4f}')
    print(f'Best epoch: {best_epoch:03d}')

    # load best model 
    return best_model         

In [None]:
if not 'RealArt_vs_FakeArt_resnet50.pt' in os.listdir():
   criterion = nn.CrossEntropyLoss()
   optimizer = optim.Adam(model.parameters(), lr=1e-3)
   scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
   early_stop= EarlyStopping(patience = 3, min_delta = 0.001)
   ImageFile.LOAD_TRUNCATED_IMAGES = True
   best_model_head=fine_tune(model, train_loader, validation_loader, criterion, optimizer, scheduler, early_stop, num_epochs = 30)
   torch.save(best_model_head, 'RealArt_vs_FakeArt_resnet50.pt')