# Testing a `torch` model
---

This notebook provides an implementation of the prediction step with a Pytorch model.

In [None]:
import pkg_resources
run_colab = "google" in {pkg.key for pkg in pkg_resources.working_set}

if run_colab:
    from google.colab import drive
    drive.mount("/content/drive", force_remount=True)

    root = "/content/drive/MyDrive/DatSciEO"
    import os, sys
    sys.path.append(root)

    data_dir_in = "1123_top10"
    data_name = "1123_delete_nan_samples"
    data_zip = f"/content/drive/My\ Drive/DatSciEO/data/{data_dir_in}/{data_name}.zip"
    data_root = "/content/data/"
    data_out_dir = os.path.join(data_root, data_dir_in)
    os.makedirs(data_out_dir, exist_ok=True)
    print(f"unzipping data to {data_out_dir}")
    !unzip -o {data_zip} -d {data_out_dir}
    print("unzipping done")
else:
    root = "."
    data_root = "."

In [None]:
import datetime
import json
import os
import time

import numpy as np

from models import TreeClassifConvNet, TreeClassifResNet50
from utils import TreeClassifPreprocessedDataset

import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
from torch.utils.data import DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter

In [None]:
# general training settings
batch_size_test = 20
verbose = True

checkpoint = torch.load("")

In [None]:
# create datasets and dataloaders
dataset_dir = os.path.join(data_root, "data/1123_top10/1123_delete_nan_samples")
ds_test = TreeClassifPreprocessedDataset(dataset_dir, indices=range(100, 200))
dl_test = DataLoader(ds_test, batch_size_test, shuffle=True)

# define dataloaders for training

if verbose: print(
    f"\nUsing dataset with properties:\n"       \
    f"\tsamples test:    {len(ds_test)}\n"      \
    f"\tshape: {ds_test[0][0].shape}\n"         \
    )


Using dataset with properties:
	samples:    54930
	   train:   38451
	   val:     16479
	shape: (30, 5, 5)



In [None]:
# model, loss and optimizer
model = TreeClassifResNet50(
    n_classes = ds_test.n_classes,
    width = ds_test.width,
    height = ds_test.height,
    depth = ds_test.depth
)

model.load_state_dict(checkpoint["model_state_dict"])

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

if verbose:
    print(model)
    print(f"with {sum(p.numel() for p in model.parameters())} parameters")

TreeClassifConvNet(
  (model): Sequential(
    (0): Conv2d(30, 15, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(15, 7, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): Conv2d(7, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (5): ReLU()
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=125, out_features=10, bias=True)
  )
)
with 6597 parameters


## The prediction loop

In [None]:
n_batch_test = np.ceil(len(ds_test)/batch_size_test)

# prediction loop
correct = 0
last_info = time.time()
t0 = time.time()
model.eval()
all_gts = []
all_preds = []
for i_batch, (x, y) in enumerate(dl_test):
    assert torch.isnan(x).sum() == 0, "NaN in test data, please fix."

    x = x.to(device)
    y = y.to(device)

    pred = model(x)

    all_gts.extend(y.detach().cpu().numpy())
    all_preds.extend(np.argmax(pred.detach().cpu().numpy(), axis=1))


    if verbose and (((time.time() - last_info) > 20) or (i_batch % (n_batch_test//10) == 0)):
        last_info = time.time()
        loss, current = loss.item(), (i_batch + 1) * len(x)
        t_per_it = (time.time()-t0) / (i_batch+1)
        ETA = (n_batch_test - i_batch - 1) * t_per_it
        print(f"train:  {loss:>7f}     [{current:>5d}/{len(ds_test):>5d}]\t\tt/it {t_per_it:.2f}\tETA {datetime.timedelta(seconds=ETA)}\t{(datetime.datetime.now() + datetime.timedelta(seconds=ETA)).strftime('%Y-%m-%d %Hh%Mm%Ss')}")

accuracy_test = accuracy_score(all_gts, all_preds)
# TODO: confusion matrix