# INIT

In [None]:
################################################################################################################
################################################################################################################
### import overall usefull libraries
import os
import platform
import copy
import sys
import inspect
import time
import collections
import math
from datetime import datetime


### import specific libraries for this notebook
import pandas as pd
import numpy as np
import sklearn as sk
import torch
import matplotlib.pyplot as plt

### import specific functions from torck and sklearn
from torch.utils.data import Dataset, DataLoader
from functools import reduce
from sklearn.feature_extraction import DictVectorizer
from sklearn.model_selection import ParameterGrid
import joblib

### Imports that are also performed in util.py
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import torch.optim.lr_scheduler as lr_scheduler

################################################################################################################
################################################################################################################
# automatically reload python fiels (util.py and conf.py) when they are changed.
%reload_ext autoreload
%autoreload 2

# import from parent directory with a little help from sys.path.insert()
sys.path.insert(0, '..') 

### from util.py (file which once contained all classes and functions):
from util import * 

### Configuration file to determine root directory 
import conf

### check for GPU's
use_gpu = torch.cuda.is_available()

### Check everything
conf.print_python_environment()

# Select experiment and experiment grid number

In [None]:
################################################################################################################
################################################################################################################
### Experiment name
exp_name = 'FINAL'
exp_grid_run = 1

###################
# OPTIONAL:
continued = False
continued_grid = 0
orig_exp_name = exp_name
# load experiment
if continued == True:
    exp_name =  os.path.join(exp_name+"_"+str(continued_grid)+"_continued")
    print('CONTINUED experiment name: ', exp_name)
else:
    print('Experiment name: ', exp_name)
    
###################
### Load model config
model_config_file = orig_exp_name + '/models/' + exp_name + '_' + str(exp_grid_run) +'_config.csv'
model_config_df = pd.read_csv(os.path.join(conf.EXP_DIR, model_config_file), sep=',')
model_config = model_config_df.to_dict()
from pprint import pprint
pprint(model_config)

# Load data and configuration files

In [None]:
###################
# set model location
exp_model = orig_exp_name + '/models/' + exp_name + '_' + str(exp_grid_run) + '_model.chk'  

# set model results directories
exp_resultsdir = os.path.join(conf.EXP_DIR, str(orig_exp_name) + '/models/' + str(exp_name) + '_' + str(exp_grid_run) +'/')
exp_figuresdir = os.path.join(conf.EXP_DIR, str(orig_exp_name) + '/figures/')

###################
# ASSIGN the CONFIG settings from the trained model
config = { 
          'state_dim' :   model_config['state_dim'][0],
          'action_dim' :  model_config['action_dim'][0],
          'gamma' :       model_config['gamma'][0],
          'hidden_dim' :  model_config['hidden_dim'][0],
          'num_hidden' :  model_config['num_hidden'][0],
          'drop_prob' :   model_config['drop_prob'][0],
          'option' :      model_config['option'][0],
          'num_epochs':   model_config['num_epochs'][0],
          'interim_step': model_config['tracking_step_interim_model'][0]
    
         }

###################
# Define model structure to match the configuration from "best model" from 06_train_model.ipynb
model = dueling_net(D_in = config['state_dim'], 
                    H = config['hidden_dim'], 
                    D_out = config['action_dim'],
                    drop_prob = config['drop_prob'],
                    num_hidden = config['num_hidden'],
                    option = config['option'])

################################################################################################################
################################################################################################################
# This is horrible practice: https://stackoverflow.com/questions/2052390/manually-raising-throwing-an-exception-in-python
if not os.path.exists(os.path.join(conf.EXP_DIR, orig_exp_name)):
    raise Exception('Cannot find experiment directory, run create_exp_dataset prior to running this file')
else:
    exp_dir = os.path.join(conf.EXP_DIR, orig_exp_name)
    ############################################
    # load data files    
    try:
        data_dict = joblib.load(os.path.join(exp_dir, 'data/FINAL_data_dict.pkl'))
    except:
        raise Exception('Cannot load dataset, run the create_exp_dataset Notebook to create new data pickle files ')
    try:
        # Action probabilities of physician's action used for intermediate evaluateion
        train_pi_behavior = pd.read_pickle(os.path.join(exp_dir, 'KNN/KNN_pi_behavior_' + 'train' + 'data.pkl')) # pi_evaluation
        val_pi_behavior = pd.read_pickle(os.path.join(exp_dir, 'KNN/KNN_pi_behavior_' + 'val' + 'data.pkl')) # pi_evaluation
        test_pi_behavior = pd.read_pickle(os.path.join(exp_dir, 'KNN/KNN_pi_behavior_' + 'test' + 'data.pkl')) # pi_evaluation
    except: 
        raise Exception('Cannot load KNN files, run Physician_KNN.py to create new KNN pickle files')
    try:
        # dataset MDP Q function (FQI-SARSA)
        train_MDP_Q = pd.read_pickle(os.path.join(exp_dir, 'FQI/FQI_QValues_continuous_' + 'train' + 'data.pkl'))
        val_MDP_Q = pd.read_pickle(os.path.join(exp_dir, 'FQI/FQI_QValues_continuous_' + 'val' + 'data.pkl'))
        test_MDP_Q = pd.read_pickle(os.path.join(exp_dir, 'FQI/FQI_QValues_continuous_' + 'test' + 'data.pkl'))
    except: 
        raise Exception('Cannot load FQI files, run Physician_FQI.py to create new FQI pickle files')
        
# print!
print("Experiment \""+ str(exp_name) + "\" loaded with grid: " + str(exp_grid_run))
print("model: ", exp_model)
print("exp_resultsdir: ", exp_resultsdir)
print("exp_figuresdir: ", exp_figuresdir)

# Show interim models action distribution

In [None]:
print("Experiment \""+ str(exp_name) + "\" loaded with grid: " + str(exp_grid_run))
interim_model_list = np.arange(config['interim_step'], config['num_epochs']+config['interim_step'], config['interim_step'])
count=0
for interim_model in interim_model_list:
    try:
        count+=1
        exp_model = exp_name + '_' + str(exp_grid_run) + '_interim_'+ str(interim_model) +'_iteration_model.chk'
        selected_model = os.path.join(exp_resultsdir, exp_model)
        ############################################
        # Load model    
        if use_gpu:
            model = model.cuda()
            model.load_state_dict(torch.load(selected_model))
        else:
            model.load_state_dict(torch.load(selected_model, map_location=lambda storage, loc: storage))

        ############################################
        ### visual inspection of action and action probability distribution in the dataset
        print("interim model:" +str(interim_model))
        # Create multiplot
        plt.figure(figsize=(15, 3))

        # best action distribution
        outputs, best_actions, best_action_probabilities, outputs_prob, state_Q_values, best_policy_values = evaluate_model(model, data_dict, 'train', use_gpu) 
        plt.subplot(131)
        pd.Series(data_dict['train']['action']).hist(bins=21,alpha=0.5)
        pd.Series(best_actions).hist(bins=21,alpha=0.5)
        plt.ylim(0,20000)
        plt.title("TRAIN -  DQN best action distribution")

        # best action distribution
        outputs, best_actions, best_action_probabilities, outputs_prob, state_Q_values, best_policy_values = evaluate_model(model, data_dict, 'val', use_gpu) 
        plt.subplot(132)
        pd.Series(data_dict['val']['action']).hist(bins=21,alpha=0.5)
        pd.Series(best_actions).hist(bins=21,alpha=0.5)
        plt.ylim(0,20000)
        plt.title("VAL -  DQN best action distribution")

        # best action distribution
        outputs, best_actions, best_action_probabilities, outputs_prob, state_Q_values, best_policy_values = evaluate_model(model, data_dict, 'test', use_gpu) 
        plt.subplot(133)
        pd.Series(data_dict['test']['action']).hist(bins=21,alpha=0.5)
        pd.Series(best_actions).hist(bins=21,alpha=0.5)
        plt.ylim(0,20000)
        plt.title("TEST -  DQN best action distribution")

        # visual inspection
        plt.show()
        
    ### Catch "Still training error"   
    except:
        print("End of interim model list")
        break
print("done")

# Load final model

In [None]:
############################################
interim_model = False
interim = 220000

###################
# OPTIONAL: Load INTERIM model, else use final model as defined above
if interim_model:
    exp_model = orig_exp_name + '/models/' + exp_name + '_' + str(exp_grid_run) + '/' + exp_name + '_' + str(exp_grid_run) + '_interim_' + str(interim) + '_iteration_model.chk'
else:
    exp_model = orig_exp_name + '/models/' + exp_name + '_' + str(exp_grid_run) + '_model.chk'  
###################
### LOAD MODEL file and MODEL CONFIG FILE
selected_model = os.path.join(conf.EXP_DIR, exp_model)

###################
# Load model    
if use_gpu:
    model = model.cuda()
    model.load_state_dict(torch.load(selected_model))
else:
    model.load_state_dict(torch.load(selected_model, map_location=lambda storage, loc: storage))
print("loaded model: " + exp_model)
print("Finished at: " + str(datetime.now()) + "\n")  

# Final model analysis
    1) PLOT action distributions 
    2) Create files for deep Model Inspection
    3) Perform final model WIS and WDR analysis

In [None]:
print("loaded model: " + exp_model)
eval_types = ['train', 'val', 'test']
for eval_type in eval_types:
    # gamma
    gamma = config['gamma']
        
    # action probabilities of physician's action used for intermediate evaluateion
    pi_behavior = pd.read_pickle(os.path.join(exp_dir, 'KNN/KNN_pi_behavior_' + eval_type + 'data.pkl'))
    
    # eval dataset MDP Q function
    Q = pd.read_pickle(os.path.join(exp_dir, 'FQI/FQI_QValues_continuous_' + eval_type + 'data.pkl'))

    ############################################
    # Model evaluation
    outputs, best_actions, best_action_probabilities, outputs_prob, state_Q_values, best_policy_values = evaluate_model(model, data_dict, eval_type, use_gpu)   
    print(outputs.shape)

    ############################################
    # keep copy of Q VALUES
    results_df = pd.DataFrame.from_records(outputs)
    results_df = np.around(results_df,3)
    results_df.columns =  ['Q' + str(i) for i in np.unique(data_dict[eval_type]['action'])]

    # Add best action 
    results_df['best_action'] = best_actions

    # Add action Q values
    results_df['phy_action_Qvalue'] = state_Q_values
    results_df['best_action_Qvalue'] = best_policy_values

    # Add state id
    results_df['state_id'] = data_dict[eval_type]['state_id']
    
    # Save
    results_df.to_csv(os.path.join(exp_resultsdir, 'DQN_Qvalues_' + eval_type + 'data.csv'), index=False)
    
    ############################################
    # keep copy of Q VALUES PROBABILITES
    action_prob_df = pd.DataFrame.from_records(outputs_prob)
    action_prob_df = np.around(action_prob_df,3)
    action_prob_df.columns =  ['A' + str(i) for i in np.unique(data_dict[eval_type]['action'])]

    # Add best action 
    action_prob_df['best_action'] = best_actions

    # Add best action probabilites
    action_prob_df['best_action_probability'] = best_action_probabilities

    # Add state id
    action_prob_df['state_id'] = data_dict[eval_type]['state_id']
    
    # Save
    action_prob_df.to_csv(os.path.join(exp_resultsdir, 'DQN_action_prob_df_' + eval_type + 'data.csv'), index=False)
    
    ############################################
    ### visual inspection of action and action probability distribution in the dataset
    # Create multiplot
    plt.figure(figsize=(21, 6))
    
    plt.subplot(221)
    phy_action_probabilities = pi_behavior.max(axis=1)
    pd.Series(phy_action_probabilities*100).hist(bins=100)
    plt.title(str(eval_type) + " - PHYSICIAN action probability distribution")    
    
    plt.subplot(222)
    best_action_probabilities = outputs_prob.max(axis=1)
    pd.Series(best_action_probabilities*100).hist(bins=100)
    plt.title(str(eval_type) + " - DQN best action probability distribution")

    plt.subplot(223)
    pd.Series(data_dict[eval_type]['action']).hist(bins=21)
    plt.title(str(eval_type) + " -  PHYSICIAN action distribution")

    # best action distribution
    plt.subplot(224)
    pd.Series(best_actions).hist(bins=21)
    plt.title(str(eval_type) + " -  DQN best action distribution")

    # visual inspection
    #plt.savefig(os.path.join(exp_figuresdir, 'Eval_DQN_histrogram_multiplot_'+ str(eval_type) +'.tiff'),dpi=200,transparent=True)
    
    #############################################
    # create an output dataframe with for the Q values and action probability
    pi_evaluation = np.around(pd.DataFrame.from_records(outputs_prob),3)
   
    # Perform WOPE
    Phys_WDR, Phys_wis = eval_WDR(data_dict, eval_type, gamma, pi_behavior, pi_behavior, Q)
    model_WDR, model_wis = eval_WDR(data_dict, eval_type, gamma, pi_evaluation, pi_behavior, Q)

    # Results
    print( str(eval_type) + "\nPhy WDR: " + str(round(Phys_WDR, 4)) 
                          + "\nDQN WDR: " + str(round(model_WDR, 4)) 
                          + "\nPhy WIS: " + str(round(Phys_wis, 4)) 
                          + "\nDQN WIS: " + str(round(model_wis, 4)))
    print('\n\n')