In [1]:
# System and processing
import sys
import dill as pickle
import copy
from tqdm import tqdm
import os

from multiprocessing import Process, Queue, Pool
import multiprocessing
from pathos.multiprocessing import ProcessingPool, ThreadingPool
from multiprocessing import Manager

import itertools
from itertools import permutations, combinations


# Common math packages
import numpy as np
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.formula.api import ols
import random
import pandas as pd
from numpy.linalg import norm

# External packages
import sage.all
import sage.geometry.polyhedron.base as Polyhedron

# Codes
import params_team
from simple_rl.agents import FixedPolicyAgent
from simple_rl.planning import ValueIteration
from simple_rl.utils import make_mdp
from policy_summarization import bayesian_IRL
from policy_summarization import policy_summarization_helpers as ps_helpers
from policy_summarization import BEC
import policy_summarization.multiprocessing_helpers as mp_helpers
from simple_rl.utils import mdp_helpers
import policy_summarization.BEC_helpers as BEC_helpers
import policy_summarization.BEC_visualization as BEC_viz
from teams import particle_filter_team as pf_team
import teams.teams_helpers as team_helpers
import simulation.sim_helpers as sim_helpers
import teams.utils_teams as utils_teams
from analyze_sim_data import run_analysis_script


# Plotting
import matplotlib
import matplotlib.pyplot as plt
import matplotlib as mpl
from termcolor import colored
matplotlib.use('TkAgg')

mpl.rcParams['figure.facecolor'] = '1.0'
mpl.rcParams['axes.labelsize'] = 'x-large'
mpl.rcParams['xtick.labelsize'] = 'large'
plt.rcParams['figure.figsize'] = [15, 10]

from statsmodels.graphics.factorplots import interaction_plot

# Other imports.
sys.path.append("simple_rl")



  np.bool8: (False, True),




In [16]:
def get_optimal_policies(params, pool, lock):
    
    """
    Gets the minimum BEC constranints for the domain. Need to load into the database.
    """

    ps_helpers.obtain_env_policies(params.mdp_class, params.data_loc['BEC'], np.expand_dims(params.weights['val'], axis=0), params.mdp_parameters, pool, lock)

    # get base constraints for all the environments and demonstrations
    try:
        with lock:
            with open('models/' + params.data_loc['BEC'] + '/team_base_constraints.pickle', 'rb') as f:
                policy_constraints, min_subset_constraints_record, env_record, traj_record, traj_features_record, reward_record, mdp_features_record, consistent_state_count = pickle.load(f)
    except:
        # use policy BEC to extract constraints
        policy_constraints, min_subset_constraints_record, env_record, traj_record, traj_features_record, reward_record, mdp_features_record, consistent_state_count = BEC.extract_constraints(params.data_loc['BEC'], params.BEC['BEC_depth'], params.step_cost_flag, pool, lock, print_flag=True)
        with lock:
            with open('models/' + params.data_loc['BEC'] + '/team_base_constraints.pickle', 'wb') as f:
                pickle.dump((policy_constraints, min_subset_constraints_record, env_record, traj_record, traj_features_record, reward_record, mdp_features_record, consistent_state_count), f)

    # get BEC constraints
    try:
        with lock:
            with open('models/' + params.data_loc['BEC'] + '/team_BEC_constraints.pickle', 'rb') as f:
                min_BEC_constraints, BEC_lengths_record = pickle.load(f)
    except:
        min_BEC_constraints, BEC_lengths_record = BEC.extract_BEC_constraints(policy_constraints, min_subset_constraints_record, env_record, params.weights['val'], params.step_cost_flag, pool)
        with lock:
            with open('models/' + params.data_loc['BEC'] + '/team_BEC_constraints.pickle', 'wb') as f:
                pickle.dump((min_BEC_constraints, BEC_lengths_record), f)
        
    return min_subset_constraints_record, env_record, traj_record, traj_features_record, mdp_features_record, consistent_state_count, min_BEC_constraints


In [17]:
def init_pool_processes(the_lock):
    '''Initialize each process with a global variable lock.
    '''
    global file_lock
    lock = the_lock


class NoDaemonProcess(multiprocessing.Process):
    
    def __init__(self, lock, *args, **kwargs):
        self._lock = lock
        super().__init__(*args, **kwargs)
    
    
    # make 'daemon' attribute always return False
    @property
    def daemon(self):
        return False

    @daemon.setter
    def daemon(self, val):
        pass

    @property
    def lock(self):
        return self._lock
    
    def set_lock(self, lock):
        self._lock = lock

    def run(self):
        if self._lock:
            init_pool_processes(self._lock)
        super().run()


class NoDaemonProcessPool(multiprocessing.pool.Pool):

    def __init__(self, processes=None, lock=None, *args, **kwargs):
        self._lock = lock
        super().__init__(processes=processes, initializer=self.init_pool_processes, *args, **kwargs)

    def init_pool_processes(self):
        global lock
        lock = self._lock

    def Process(self, *args, **kwds):
        proc = super(NoDaemonProcessPool, self).Process(*args, **kwds)
        proc.__class__ = NoDaemonProcess
        proc._lock = self._lock  # Pass the lock to the process

        return proc

In [26]:
def initialize_teaching(params, teacher_learning_factor, pool, lock):
    
    min_subset_constraints_record, env_record, traj_record, traj_features_record, mdp_features_record, consistent_state_count, min_BEC_constraints = get_optimal_policies(params, pool, lock)

    
    team_prior, particles_team_teacher = team_helpers.sample_team_pf(params.team_size, params.BEC['n_particles'], params.weights['val'], params.step_cost_flag, teacher_learning_factor=teacher_learning_factor, team_prior = params.team_prior, model_type = params.teacher_update_model_type)
    
    variable_filter, nonzero_counter, teaching_complete_flag = team_helpers.check_and_update_variable_filter(min_subset_constraints_record, initialize_filter_flag=True)
    
    visited_env_traj_idxs, min_BEC_constraints_running = [], []
    
    return particles_team_teacher,  variable_filter, nonzero_counter, min_BEC_constraints_running, visited_env_traj_idxs

In [30]:
def run_teaching_loop(params, args):
    
    particles_demo, summary_pool, lock, variable_filter, min_BEC_constraints_running, visited_env_traj_idxs, pool, lock = args
    
    # fixed variables (maybe better to read from file)
    min_subset_constraints_record, env_record, traj_record, traj_features_record, mdp_features_record, consistent_state_count, min_BEC_constraints = get_optimal_policies(params, pool, lock)
    teacher_uf_demo = 0.8
    
    # check if learning is complete
    
    
    
    # generate demos
    demonstration, _, _, min_BEC_constraints_running, visited_env_traj_idxs, particles_demo = team_helpers.obtain_team_summary(params.data_loc['BEC'], min_subset_constraints_record, min_BEC_constraints, env_record, traj_record, mdp_features_record, params.weights['val'], params.step_cost_flag, 
                                                                                                            summary_pool, lock, params.BEC['n_human_models'], params.BEC['n_human_models_precomputed'], consistent_state_count, params.BEC['n_train_demos'], particles_demo, teacher_uf_demo, [], params.teacher_update_model_type, 
                                                                                                            variable_filter, [], [], 0, min_BEC_constraints_running, visited_env_traj_idxs)        
      
    if len(demonstration) == 0:
        print('No new demonstrations possible!')
        repeat_prev_demo_flag = True
    else:
        repeat_prev_demo_flag = False
        
        
    # obtain diagnostic tests
    unit_constraints, demo_ids, running_variable_filter_unit = team_helpers.show_demonstrations(demonstration, particles_demo, params.mdp_class, params.weights['val'], loop_count, viz_flag = demo_viz_flag)
    min_KC_constraints = BEC_helpers.remove_redundant_constraints(unit_constraints, params.weights['val'], params.step_cost_flag)
    preliminary_tests, visited_env_traj_idxs = team_helpers.obtain_diagnostic_tests(lock, params.data_loc['BEC'], demonstration, visited_env_traj_idxs, min_KC_constraints, min_subset_constraints_record, traj_record, traj_features_record, running_variable_filter_unit, mdp_features_record)

    demo_mdps = []
    for demo in demonstration:
        demo_mdps.append(demo[0])  #Demo MDP
        
    test_mdps = []
    for test in preliminary_tests:
        test_mdps.append([test[0], test[1]]) #Test MDP and Optimal Trajectory
        
        
    return demo_mpds, test_mdps, variable_filter, min_BEC_constraints_running, visited_env_traj_idxs
    

In [31]:
params = copy.deepcopy(params_team)

teacher_learning_factor = [0.8, 0.8, 0.8]

with Manager() as manager:
    lock = manager.Lock()

    summary_pool = Pool(min(params.n_cpu, 60), initializer=init_pool_processes, initargs=(lock,))

    particles_team_teacher, variable_filter, nonzero_counter, min_BEC_constraints_running, visited_env_traj_idxs  = initialize_teaching(params, teacher_learning_factor,  summary_pool, lock)

    particles_demo = copy.deepcopy(particles_team_teacher['p1'])

    args = particles_demo, summary_pool, lock, variable_filter, min_BEC_constraints_running, visited_env_traj_idxs, summary_pool, lock

    demo_mpds, test_mdps, ariable_filter, min_BEC_constraints_running, visited_env_traj_idxs = run_teaching_loop(params, args)

Solving for the optimal policy in each environment:


0it [00:00, ?it/s]


Initializing particle filter..
num_points:  500
Updating particles for  p1 with prior knowledge in  noise  condition...
Initializing particle filter..
num_points:  500
Updating particles for  p2 with prior knowledge in  noise  condition...
Initializing particle filter..
num_points:  500
Updating particles for  p3 with prior knowledge in  noise  condition...
num_points:  500
Team knowledge:  {'p1': [[array([[ 0,  0, -1]])]], 'p2': [[array([[ 0,  0, -1]])]], 'p3': [[array([[ 0,  0, -1]])]], 'common_knowledge': [[array([[ 0,  0, -1]])]], 'joint_knowledge': [[[array([[ 0,  0, -1]])], [array([[ 0,  0, -1]])], [array([[ 0,  0, -1]])]]]}
num_points:  500
team_prior[joint_knowledge]:  [[[array([[ 0,  0, -1]])], [array([[ 0,  0, -1]])], [array([[ 0,  0, -1]])]]]
Update JK with constraints: [[array([[ 0,  0, -1]])], [array([[ 0,  0, -1]])], [array([[ 0,  0, -1]])]]
Solving for the optimal policy in each environment:


0it [00:00, ?it/s]


num_points:  2500


TypeError: '<' not supported between instances of 'list' and 'float'