# Run evaluation of exported models

###### Execute on remote
Load batch sizes, datasets and exporter locations, submits slurm jobs for each of them evaluating the export on validation and test sets overall. 

Input:
* /home/nfs/nknyazev/thesis/data/results/exported_model_params.json. json file produced by ../2_evaluate_exports/1_gather_export_model_params or manually. This file has one model per line, containing information needed to rerun the export and is formatted as:
>"0": {
        "dataset": "/home/nfs/nknyazev/thesis/data/numpy/ml-10m",
        "batch_size": "1000",
        "export_path": "/path/to/export/metric/000000001"
    }
    
Uses:
* model/estimator/estimator_evaluate_export.py

Returns:
* u_evaluator_folders.json - json containing each export's paths, formatted as:
>"0": {
        "dataset": "ml-10m",
        "model_id": "0",
        "path": "/path/to/grandparent_folder" that would contain 3/test_u_evaluator
    }
    
Note that when dealing with s3/sagemaker 4b python file should be run instead.

Below `python3 model/estimator/estimator_evaluate_export.py ...` command can be adapted to match your execution environment as long as the outputs adhere to the above structure

In [None]:
import json
import os
import re
import sys
import subprocess

In [None]:
input_path = "/home/nfs/nknyazev/thesis/data/results/exported_model_params.json"
pickle_root = "/tudelft.net/staff-bulk/ewi/insy/MMC/nknyazev/pickled_evaluators"
output_path = "/home/nfs/nknyazev/thesis/data/results/u_evaluator_folders.json"

In [None]:
# Sbatch parameters
SBATCH_STRING = """#!/bin/sh
#SBATCH --time=04:00:00
#SBATCH --qos=short
#SBATCH --cpus-per-task=2
#SBATCH --mem=16384
#SBATCH --gres=gpu:pascal:1
"""

# Lines needed to use tensorflow
CUDA_STRING = """module use /opt/insy/modulefiles
module load cuda/10.0
module load cudnn/10.0-7.6.0.64
"""

# String printing paremeters
ECHO_STRING = "echo export_path: {}\necho data_path: {}\necho batch_size: {}\necho reset_devices: {}\n"

# String for submission of 
JOB_STRING = "srun -u python3 model/estimator/estimator_evaluate_export.py " \
             "--export_path {} " \
             "--data_path {} " \
             "--batch_size {} " \
            "--reset_devices {} " \
            "--pickle_folder {} "\
            "--pickle_non_user_evaluators False \n "

SBATCH_FILENAME = "job.sbatch"

In [None]:
# Read the params related to exports
with open(input_path) as input_file:
    params = json.load(input_file)

In [None]:
submission_history = {}

In [None]:
# Iterate over exports
for i, (k,v) in enumerate(params.items()):
    # Actual dataset name like lastfm_10_pc and ml-10m
    dataset = v["dataset"].split("/")[-1]
    # Folder containing outliers
    outlier_path = os.path.join(v["dataset"], "outliers")
    # Params included in ECHO STRING and JOB STRING
    general_params = [v["export_path"], v["dataset"], v["batch_size"], "True"]

    # Write to .sbatch file
    with open(SBATCH_FILENAME, "w") as output_file:
        
        # String allowing to retrieve params
        echo_string = ECHO_STRING.format(*general_params)
        job_strings = []
        # Multiple slices of data per one sbatch file
        # As this code is adapted from ./2_evaluate_exports/2_...ipynb it classified 
        # ALL interactions under number 3 (as opposed to numbers 0-2 for low, medium and high time gaps)
        pickle_path = os.path.join(pickle_root, "/".join(general_params[0].split("/")[8:18]), "3")
        combined_params = general_params + [pickle_path]
        job_strings.append(JOB_STRING.format(*combined_params))
            
        output_file.write(SBATCH_STRING)
        output_file.write(CUDA_STRING)
        output_file.write(echo_string)
        for job_string in job_strings:
            output_file.write(job_string)
#     Submit the job file (requires sbatch to be installed on the system where the code is executed)
#     subprocess.call(["sbatch", SBATCH_FILENAME])
    model_id = re.findall(dataset + '/\d{2,4}/(\d)', v["export_path"])[0]
    pickle_parent = os.path.split(pickle_path)[0]
    submission_history[str(i)] = {"dataset": dataset, "model_id": model_id, "path":pickle_parent}

In [None]:
with open(output_path, "w") as output_file:
    json.dump(submission_history, output_file, indent=4)