In [None]:
from tensorboardX import SummaryWriter
from validate import validate
from networks.trainer import Trainer
from torch.utils.data import DataLoader
import numpy as np
import os
import time
import random
import torch

#from base_miner.util import Logger
from util.data import load_datasets, create_real_fake_datasets
from bitmind.image_transforms import base_transforms, random_aug_transforms
from options import TrainOptions

In [None]:
opt = TrainOptions().parse()

model = Trainer(opt)
weight_path = '../mining_models/base_aug.pth'
model.model.load_state_dict(torch.load(weight_path, map_location='cpu'))
model.model.eval();

In [None]:
from bitmind.constants import DATASET_META

real_datasets, fake_datasets = load_datasets(DATASET_META)

# set augment_test to True to apply random augmentations to test data. This is to allow evaluation that is more 
# representative of validator data, which includes augmentation. 
train_dataset, val_dataset, test_dataset = create_real_fake_datasets(
    real_datasets, 
    fake_datasets, 
    train_transforms=random_aug_transforms,
    val_transforms=base_transforms,
    test_transforms=random_aug_transforms)

In [None]:
train_loader = DataLoader(
    train_dataset, batch_size=32, shuffle=True, num_workers=0, collate_fn=lambda d: tuple(d))
val_loader = DataLoader(
    val_dataset, batch_size=32, shuffle=False, num_workers=0, collate_fn=lambda d: tuple(d))
test_loader = DataLoader(
    test_dataset, batch_size=32, shuffle=False, num_workers=0, collate_fn=lambda d: tuple(d))



In [None]:
model.eval()
acc, ap, r_acc, f_acc, y_true, y_pred = validate(model.model, test_loader)
print("(Test) acc: {}; ap: {}".format(acc, ap))

In [None]:
from sklearn.metrics import average_precision_score, precision_recall_curve, accuracy_score
import matplotlib.pyplot as plt

pred_idx = 0
test_loader.dataset.reset()
with torch.no_grad():
    y_true, y_pred = [], []
    for batch in test_loader:
        keep_idx = [i for i, b in enumerate(batch) if b[0].shape[0] == batch[0][0].shape[0]]
        # batch = np.array(batch)
        inputs = torch.stack([b[0] for i, b in enumerate(batch) if i in keep_idx])
        labels = torch.stack([torch.tensor(b[1]) for i, b in enumerate(batch) if i in keep_idx])
        img, label = inputs.to('cuda').float(), labels.to('cuda').float()

        out = model.model(img).sigmoid().flatten().tolist()
        y_pred.extend(out)
        y_true.extend(label.flatten().tolist())

        for y, y_hat, img, idx in zip(label.flatten().tolist(), out, img, keep_idx):
            if y != np.round(y_hat):
                plt.imshow(img.cpu().permute(1,2,0).numpy())
                plt.show()
                img_id = test_loader.dataset._history['index'][pred_idx + idx]
                img_source = test_loader.dataset._history['source'][pred_idx + idx]
                print(f"source: {img_source}; id: {img_id}; Label: {y}; Prediction: {y_hat}")
        pred_idx += len(keep_idx)

y_true, y_pred = np.array(y_true), np.array(y_pred)
r_acc = accuracy_score(y_true[y_true==0], y_pred[y_true==0] > 0.5)
f_acc = accuracy_score(y_true[y_true==1], y_pred[y_true==1] > 0.5)
acc = accuracy_score(y_true, y_pred > 0.5)
ap = average_precision_score(y_true, y_pred)


In [None]:
def compute_metrics(TP, FP, TN, FN):
    precision = TP / (TP + FP) if (TP + FP) != 0 else 0
    recall = TP / (TP + FN) if (TP + FN) != 0 else 0
    if (precision + recall) == 0:
        f1_score = 0
    else:
        f1_score = 2 * (precision * recall) / (precision + recall)
    
    return precision, recall, f1_score

In [None]:
tp = sum(y_pred[y_true==1] > 0.5)
fp = sum(y_pred[y_true==0] > 0.5)
tn = sum(y_pred[y_true==0] <= 0.5)
fn = sum(y_pred[y_true==1] <= 0.5)
tp, fp, tn, fn

In [None]:
compute_metrics(tp, fp, tn, fn)

In [None]:
len(test_loader), len(test_dataset)