# Evaluate the performance of a trained model

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import os
import torch
from torch.utils.data import DataLoader
from torch.nn import DataParallel

from vos.models.STM import STM
from vos.algo.stm_train import STMAlgo
from vos.models.EMN import EMN
from vos.algo.emn_train import EMNAlgo
from vos.datasets.DAVIS import DAVIS_2017_TrainVal
from vos.utils.helpers import stack_images, stack_masks

os.environ["CUDA_VISIBLE_DEVICES"] = "1"

In [None]:
davisroot = "/p300/videoObjSeg_dataset/DAVIS-2017-trainval-480p"
videod = DAVIS_2017_TrainVal(davisroot, mode= "val")
dataloader = DataLoader(videod)
print(len(dataloader))

In [None]:
model = DataParallel(STM())
algo = STMAlgo(eval_step_kwargs= dict(Mem_every= 5),)

# model = DataParallel(EMN())
# algo = EMNAlgo(eval_step_kwargs= dict(Mem_every= 5),)

model.cuda()
algo.initialize(model)
model.eval()

## Start evaluating and calculating the mean loss and performance

In [None]:
def run_test():
    loss, IoU, contour_acc = 0.,0.,0.
    IoU_each_frame, contour_acc_frame = [], []
    for i, data in enumerate(dataloader):
        eval_info, extra_info = algo.eval(i, data)
        loss += eval_info.loss
        IoU += eval_info.IoU
        contour_acc += eval_info.contour_acc
        IoU_each_frame.append(extra_info["IoU_each_frame"])
        contour_acc_frame.append(extra_info["contour_acc_frame"])
        print(f"Evaluating at itereation: {i+1},", end= " ")
        print("IoU_this: {:.4f},".format(eval_info.IoU), end= " ")
        print("IoU: {:.4f},".format(IoU/(i+1)), end= " ")
        print("IoU_frame_ave: {:.4f},".format(np.nanmean(np.hstack(IoU_each_frame))), end= " ")
        print("ContourAcc_this: {:.4f},".format(eval_info.contour_acc), end= " ")
        print("ContourAcc: {:.4f},".format(contour_acc / (i+1)), end= " ")
        print("ContourAcc_frame_ave: {:.4f},".format(np.nanmean(np.hstack(contour_acc_frame))), end= " ")
        print("n_frames: {},".format(extra_info["IoU_each_frame"].shape[0]), end= "\r")
    return IoU / (i+1), \
        np.nanmean(np.hstack(IoU_each_frame)), \
        contour_acc / (i+1), \
        np.nanmean(np.hstack(contour_acc_frame))

In [None]:
IoU, f_IoU, acc, f_acc = 0,0,0,0
itr_is = []
IoUs, f_IoUs, accs, f_accs = [], [], [], []

threshold, acc_threshold = 0.69, 0.74
# threshold, acc_threshold = 0.57, 0.62
# threshold, acc_threshold = 0.38, 0.47
while (IoU < threshold and f_IoU < threshold) or (acc < acc_threshold and f_acc < acc_threshold):
    torch.cuda.empty_cache()
    try:
        state_dict = torch.load(os.path.join(statedict_root, "params.pkl"))
#         state_dict = torch.load("/root/VideoObjSeg/data/weightfiles/STM_5ImgData_fulltrain_71.1-74.2_DAVIS2017val.pkl")
#         state_dict = torch.load("/root/VideoObjSeg/data/weightfiles/EMN_5ImgData_pretrain_71.11-41.20_DAVIS2017val.pkl")
    except:
        print("Error, continuing", end= "\r")
        continue
    print("Train iteration: ", state_dict['itr_i'])
    model_state_dict = state_dict['model_state_dict']
    model.load_state_dict(model_state_dict)
    model.eval()
    IoU, f_IoU, acc, f_acc = run_test()
    
#     itr_is.append(state_dict['itr_i'])
#     IoUs.append(IoU); f_IoUs.append(f_IoU); accs.append(acc); f_accs.append(f_acc)
#     plt.plot(itr_is, IoUs, "b+")
#     plt.plot(itr_is, f_IoUs, "bo")
#     plt.plot(itr_is, accs, "r+")
#     plt.plot(itr_is, f_accs, "ro")
    
    print("Testing result IoU: {:.5f}".format(IoU), 
          "frames_IoU: {:.5f}".format(f_IoU), 
          "ContourAcc: {:.5f}".format(acc), 
          "frames_ContourAcc: {:.5f}".format(f_acc), 
          "######################################################################################################"
    )


In [None]:
dst = os.path.join(statedict_root, "params-{:.2f}-{:.2f}.pkl".format(f_IoU*100, f_acc*100))
print("Save to: ", dst)
torch.save(state_dict, dst)

# Use evaluating benchmark to evaluate model
NOTE: you can run the following block without any of previous blocks

please install https://github.com/suhwan-cho/davis-evaluation

In [1]:
import os
import sys

import numpy as np
from davis2017.evaluation import DAVISEvaluation

############### all eval_DAVIS imports
import torch
from torch.utils import data

import torch.nn as nn
import torch.nn.functional as F

# general libs
import cv2
import matplotlib.pyplot as plt
from PIL import Image
import math
import tqdm
import threading

### My libs
from vos.datasets.DAVIS import DAVIS_MO_Test
from vos.models.STM import STM
from vos.algo.stm_train import STMAlgo
from vos.models.EMN import EMN
from vos.algo.emn_train import EMNAlgo

Space-time Memory Networks: initialized.


In [2]:
GPU = "2"
YEAR = "17"
SET = "val"

davisroot = "/p300/videoObjSeg_dataset/DAVIS-2017-trainval-480p"
outputroot = "/p300/VideoObjSeg_data/STM_test/continue_test/"
dataset_eval = DAVISEvaluation(davis_root=davisroot, task="semi-supervised", gt_set="val", type="2017")

os.environ['CUDA_VISIBLE_DEVICES'] = GPU

torch.cuda.empty_cache()
palette = Image.open(davisroot + '/Annotations/480p/blackswan/00000.png').getpalette()

Testset = DAVIS_MO_Test(davisroot, resolution='480p', imset='20{}/{}.txt'.format(YEAR,SET), single_object=(YEAR==16))
Testloader = data.DataLoader(Testset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True)

model = nn.DataParallel(STM())
algo = STMAlgo() # only use its step() method, so no need for any hyper-parameters

# model = nn.DataParallel(EMN())
# algo = EMNAlgo() # only use its step() method, so no need for any hyper-parameters

model.cuda()
model.eval() # turn-off BN
algo.initialize(model)

In [11]:
statedict_root = None
################################# 3 datasets stm pretrain
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200319/synth0.0-10.0-0.05-0.0-0.1/img_res-384,384/NNSTM/big_objects-True1/b_size-4/run_0"
## stm fulltrain
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200326/img_res-384,384/NNSTM/b_size-4/pretrainTrue/run_0"
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200331/img_res-384,384/NNSTM/b_size-4/pretrainTrue/run_0"
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200402/img_res-384,384/NNSTM/b_size-4/pretrainTrue/run_0"
################################## stm main only
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200326/img_res-384,384/NNSTM/b_size-4/pretrainFalse/run_0"
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200403/img_res-384,384/NNSTM/b_size-4/pretrainFalse/run_0"
################################## 5 datasets stm pretrain
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200327/synth0.0-10.0-0.05-0.0-0.1/img_res-384,384/NNSTM/big_objects-True1/b_size-4/run_0"
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200327/synth0.0-10.0-0.05-0.0-0.1/img_res-384,384/NNSTM/big_objects-True1/b_size-4/w_decay-0.0005/run_0"
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200412/synth0.0-10.0-0.05-0.0-0.1/NNSTM/trainParam-24-1e-05-100000000000000000000-0.9/pixel_dilate-1/run_0"
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200413/synth0.0-10.0-0.05-0.0-0.1/NNSTM/trainParam-4-1e-05-10000000000-0.9/pixel_dilate-1/run_0"
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200413/synth0.0-10.0-0.05-0.0-0.1/NNSTM/trainParam-4-5e-05-10000000000-0.9/pixel_dilate-1/run_0"
################################## 5 datasets emn pretrain
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200402/synth0.0-10.0-0.05-0.0-0.1/img_res-384,384/NNEMN/big_objects-True1/b_size-4/w_decay-0.0/run_0"
# statedict_root = "/p300/VideoObjSeg_data/local/video_segmentation/20200407/synth0.0-10.0-0.05-0.0-0.1/img_res-384,384/NNEMN/big_objects-True1/b_size-4/w_decay-0.0/run_0"
# statedict_root = "/root/VideoObjSeg/data/local/video_segmentation/20200409/synth0.0-10.0-0.05-0.0-0.1/NNEMN/big_objects-True1/pixel_dilate-1/run_0"
# statedict_root = "/root/VideoObjSeg/data/local/video_segmentation/20200411/synth0.0-10.0-0.05-0.0-0.1/NNEMN/trainParam-24-5e-05-10000000000-0.9/pixel_dilate-1/run_0"
################################## emn fulltrain
# statedict_root = "/root/VideoObjSeg/data/local/video_segmentation/20200406/img_res-384,384/NNEMN/b_size-4/pretrainTrue/run_0"
# statedict_root = "/root/VideoObjSeg/data/local/video_segmentation/20200409/NNEMN/b_size-4/pretrainTrue/run_0"
# statedict_root = "/root/VideoObjSeg/data/local/video_segmentation/20200412/NNEMN/train_spec-20-5e-05/pretrainTrue/run_0"

In [5]:
to_save = [None, None, None] # a global object that enables multi-threading saving files
def save_result_to_output():
    seq_name, num_frames, pred = to_save[:]
    # save elements into outputroot sub-directories
    test_path = os.path.join(outputroot, seq_name)
    if not os.path.exists(test_path):
        os.makedirs(test_path)
    for f in range(num_frames):
        img_E = Image.fromarray(pred[f])
        img_E.putpalette(palette)
        img_E.save(os.path.join(test_path, '{:05d}.png'.format(f)))
    
def eval_davis(model_state_dict):
    torch.cuda.empty_cache()
    model.load_state_dict(model_state_dict)

    code_name = 'DAVIS_{}{}'.format(YEAR,SET)
    print('Start Testing: {}, output to: {}'.format(code_name, outputroot))

    # construct fist thread, just for code consistency
    global to_save
    saving_thread = threading.Thread()
    saving_thread.start()
    
    # mantain the compute and saving schema
    for seq, V in enumerate(Testloader):
        torch.cuda.empty_cache()
        Fs, Ms, num_objects, info = V
        seq_name = info['name'][0]
        num_frames = info['num_frames'][0].item()
        print('video: {:2d} [{:15s}]: num_frames: {:3d}, num_objects: {:2d}'.format(seq, seq_name, num_frames, num_objects[0][0]),end= "\r")

        # compute
        with torch.no_grad():
            pred, _ = algo.step(
                frames= Fs,
                masks= Ms,
                n_objects= num_objects,
                Mem_every=5, Mem_number=None
            )
        pred = np.argmax(pred[0].detach().cpu().numpy(), axis= 1).astype(np.uint8)
        
        # save
        saving_thread.join()
        del saving_thread
        to_save.pop(); to_save.pop(); to_save.pop()
        to_save.extend([seq_name, num_frames, pred])
        saving_thread = threading.Thread(target= save_result_to_output)
        saving_thread.start()
        
    saving_thread.join()
    del saving_thread
    print("Output to {} done".format(outputroot))


In [14]:
J_thresh, F_thresh = 0.57, 0.62
J_mean, F_mean = 0, 0
while J_mean < J_thresh and F_mean < F_thresh and False:
    # run to generate val output
    try:
        print("Loading weights", end= "\r")
#         state_dict = torch.load(os.path.join(statedict_root, "params.pkl"))
#         state_dict = torch.load("/root/VideoObjSeg/data/weightfiles/EMN_5ImgData_pretrain_71.11-41.20_DAVIS2017val.pkl")
#         state_dict = torch.load("/root/VideoObjSeg/data/weightfiles/STM_5ImgData_fulltrain_71.1-74.2_DAVIS2017val.pkl") # 
#         state_dict = torch.load("/root/VideoObjSeg/data/weightfiles/STM_5ImgData_pretrain_62.1-65.6_DAVIS2017val.pkl")
#         state_dict = torch.load("/root/VideoObjSeg/data/weightfiles/STM_params_56.38DAVIS2017val.pkl")
        state_dict = torch.load("/root/VideoObjSeg/data/weightfiles/STM_pretrain_62.29-65.62_DAVIS2017val.pkl")
        print("Weight loaded at itr: {}".format(state_dict["itr_i"]))
        eval_davis(state_dict["model_state_dict"])
    except Exception as e:
        print(e, end= "\r")
        continue
    # run to evaluate the output
    print("Evaluating using evaluating server")
    metric_res = dataset_eval.evaluate(outputroot)
    J, F = metric_res['J'], metric_res['F']
    J_mean, F_mean = np.mean(J['M']), np.mean(F['M'])
    print("J_mean: {:.4f}, F_mean: {:.4f}".format(J_mean, F_mean))


In [None]:
dst = os.path.join(statedict_root, "params-{:.2f}-{:.2f}.pkl".format(J_mean*100, F_mean*100))
print("Save to: ", dst)
torch.save(state_dict, dst)