In [None]:
from load_dataset.artgraph import ArtGraph
from utils.MultiTaskLinkSplitter import MultiTaskLinkSplitter
from utils.dataset import TrainingDataSet, TestDataSet, SingleTaskTestDataSet, SingleTaskTrainingDataSet
from utils.EarlyStopping import EarlyStopping
from multi_task_model import MultiTaskClassificationModel, NewMultiTaskClassificationModel
from torch import nn
from utils.training_utils import fine_tune, test as test_func, compute_topk, plot_confusion_matrix, fine_tune_single_task, test_single_task
from torch.utils.data import DataLoader
from torch import optim
import torch
import pandas as pd
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
%matplotlib inline

In [None]:
from torch_geometric.seed import seed_everything
seed_everything(1)

In [None]:
root = 'artgraph2bestemotions'
data_path = 'data_full_labels.csv'

In [None]:
data = ArtGraph(root = root, preprocess='constant', features = 'vit', fine_tuning = True)[0]
data

In [None]:
mls = MultiTaskLinkSplitter(seed = 1)
train, val, test = mls.transform(data)
train

In [None]:
#loading data with labels
data_labels = pd.read_csv(data_path)
data_labels.index = data_labels.artwork
data_labels

In [None]:
from torch_geometric.transforms import ToUndirected
batch_size = 128
num_epochs = 50
device = torch.device('cuda:0')
train = ToUndirected()(train).to(device)

In [None]:
from model import ModelClassification
for task in ('style', 'genre', 'emotion'):
    print(f"{'*'*50}{task.upper()}{'*'*50}")
    train_dataset = SingleTaskTrainingDataSet(train, data_labels, task)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
    
    val_dataset = SingleTaskTestDataSet(val, data_labels, train, task)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
    
    
    model = ModelClassification(data = train, hidden_channels = 128, out_channels = train[task].x.shape[0],
                               obj = task, head_num_layers = 5, gnn_activation = torch.nn.Tanh(),
                               head_activation = torch.nn.LeakyReLU, drop_rate = 0.25,
                               shared=True, gnn_mean =False, bnorm=False).to(device)
    
    with torch.no_grad():
        model.encoder(train.x_dict, train.edge_index_dict)
    
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    early_stop = EarlyStopping(patience = 5, min_delta = 1e-3, checkpoint_path = f'best_model_{task}_single_task.pt')
    criterion = torch.nn.CrossEntropyLoss()
    scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, 'min', min_lr = 1e-7, verbose = True, factor = .5, patience = 1,
                                               threshold = 1e-4)
    fine_tune_single_task(model, train, train_loader, val_loader, criterion, optimizer, early_stop, scheduler,
                         num_epochs)

In [None]:
batch_size = 128
device = torch.device('cuda:0')
map_hop={'style':5,
        'genre':3,
        'emotion':2}
results_df = pd.DataFrame(columns = ['Top1', 'Top2', 'Macro-F1'], index = ('style', 'genre', 'emotion'))
for task in ('style', 'genre', 'emotion'):
    test_dataset = SingleTaskTestDataSet(test, data_labels, train, task)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, drop_last=False)
    model = torch.load(f'best_model_{task}_single_task.pt').to(device)
    with torch.no_grad():
        pred, lab = test_single_task(model, test_loader, train)
    results_df['Top1'].loc[task] = compute_topk(lab, pred, k=1)
    results_df['Top2'].loc[task] = compute_topk(lab, pred, k=2)
    pred_lab = torch.max(torch.Tensor(pred), 1)[1]
    results_df['Macro-F1'].loc[task] = f1_score(lab, pred_lab, average = 'macro')
    labels = pd.read_csv(f'{root}/mapping/{task}_entidx2name.csv', names = ['idx','name'])['name'].tolist()
    plot_confusion_matrix(lab, pred_lab, hop = map_hop[task], labels = labels)

In [None]:
results_df