In [1]:
!pip install gym stable-baselines3[extra] boto3 scipy python-dotenv --upgrade --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awscli 1.25.91 requires botocore==1.27.90, but you have botocore 1.29.157 which is incompatible.[0m[31m
[0m

In [2]:
%load_ext dotenv
%dotenv env

In [3]:
# %load rl_constant
FEATURE_COLUMNS = [
    
    "user_count",
    "project_count" ,
    "country_count", 
    "date_hour_sin", 
    "date_hour_cos",
    "date_minute_sin",
    "date_minute_cos",
    
    "session_30_count",
    "session_5_count",
    "cum_session_event",
    "cum_session_time",
    "expanding_click_average",
   
    "cum_platform_time",
    "cum_platform_event",
    "cum_projects",
    "average_event_time",
    "delta_last_event",
    
    "rolling_session_time",
    "rolling_session_events",
    "rolling_session_gap",
    "previous_session_time",
    "previous_session_events",
]



METADATA = [
    "user_id",
    "session_30_count_raw",
    "cum_platform_event_raw",
    "cum_platform_time_raw",
    "cum_session_time_raw",
    "cum_session_event_raw",
    "date_time"
]

RL_STAT_COLS = [
    'session_size',
    'session_minutes',
    'size_cutoff',
    'time_cutoff',
    'reward'
]

PREDICTION_COLS = [
    "label",
    "pred"
]

LOAD_COLS = list(set(FEATURE_COLUMNS + METADATA + RL_STAT_COLS + PREDICTION_COLS))

In [21]:
# %load environment
# %load environment
# %load environment
import gym
import numpy as np
from scipy.stats import norm
MAX_EVAL_SIZE = 75

class CitizenScienceEnv(gym.Env):
    
    metadata = {'render.modes': ['human']}
    
    def __init__(self, dataset, out_features, n_sequences, params=None):
        """
        trajectories: dictionary of user_id to their respective trajectories.
        n_sequences: number of sequences used for preprocessing.
        n_features: number of features used for preprocessing.
        """
        super(CitizenScienceEnv, self).__init__()
        self.dataset = dataset
        self.unique_sessions = self.dataset[['user_id', 'session_30_count_raw']].drop_duplicates()
        self.n_sequences = n_sequences
        self.current_session = None
        self.current_session_index = 0
        self.reward = 0
        self.n_sequences = n_sequences
        self.out_features = out_features
        
        max_session_size = self.dataset['session_size'].max()
        
        self.action_space = gym.spaces.Discrete(4)
        self.observation_space = gym.spaces.Box(low=-1, high=91, shape=(len(out_features) + 3, n_sequences + 1), dtype=np.float32)
        self.episode_bins = []
        self.exp_runs = 0
        self.params = params
        

    def reset(self):
        random_session = np.random.randint(0, self.unique_sessions.shape[0])
        
        user_to_run, session_to_run = self.unique_sessions.iloc[random_session][['user_id', 'session_30_count_raw']]
        self.current_session = self._get_events(user_to_run, session_to_run)
        self.metadata = self._metadata()
        self.current_session_index = 1
        self.reward = 0
        return self._state()
    
    def _row_to_dict(self, metadata):
        """
        Convert a row of metadata to a dictionary.
        """
        return metadata.to_dict()
    
    def _reward_exp(self, cum_session_event_raw):
        """
        Reward shaping as
            0 if cum_session_event_raw < size_cutoff
            (cum_session_event_raw - size_cutoff) * (cum_session_event_raw / size_cutoff) otherwise
        """
        if cum_session_event_raw <= self.metadata['size_cutoff']:
            return cum_session_event_raw / self.metadata['size_cutoff']
        
        return (cum_session_event_raw - self.metadata['size_cutoff']) * (cum_session_event_raw / self.metadata['size_cutoff'])

    def step(self, action):
        
        self._take_action(action)
            
        next_state, done, meta = self._calculate_next_state()
        
        
        if done:
            current_session_index = self.current_session_index if \
                self.current_session_index != self.current_session.shape[0] else self.current_session.shape[0] - 1
            
            self.exp_runs += 1
            self.metadata['ended_event'] = self.current_session.iloc[current_session_index]['cum_session_event_raw']
            self.metadata['ended_time'] = self.current_session.iloc[current_session_index]['cum_session_time_raw']
            self.metadata['exp_runs'] = self.exp_runs
            self.episode_bins.append(self._row_to_dict(self.metadata))
            
            self.metadata['ended_event'] = self.current_session.iloc[current_session_index]['cum_session_event_raw']
            self.metadata['ended_time'] = self.current_session.iloc[current_session_index]['cum_session_time_raw']
            self.metadata['exp_runs'] = self.exp_runs
            self.episode_bins.append(self._row_to_dict(self.metadata))
           
            cum_session_event_raw = self.current_session.iloc[current_session_index]['cum_session_event_raw']
            reward_exp = self._reward_exp(cum_session_event_raw)
            
            return next_state, reward_exp , done, {}
        else:
            self.reward = self.current_session.iloc[self.current_session_index]['cum_session_event_raw']
            cum_session_event_raw = self.current_session.iloc[self.current_session_index]['cum_session_event_raw']
            
            reward_exp = self._reward_exp(cum_session_event_raw)
    
            self.current_session_index += 1        
            
            return next_state, reward_exp, done, meta
    
    def _metadata(self):
        session_metadata = self.current_session.iloc[0][RL_STAT_COLS].copy()
        session_metadata['ended'] = 0
        for meta_col in ['small', 'medium', 'large']:
            session_metadata[f'inc_{meta_col}'] = 0
            session_metadata[f'time_{meta_col}'] = 0

        return session_metadata
    
    def flush_episode_bins(self):
        episode_bins = self.episode_bins.copy()
        self.episode_bins = []
        return episode_bins
    
    def _calculate_next_state(self):
        
        if (self.current_session_index == self.current_session.shape[0]):
            return None, True, {}

        if self._continuing_in_session():
            return self._state(), False, {}
    
        return None, True, {}
         
    def _continuing_in_session(self):
        event_cutoff = self.current_session.iloc[self.current_session_index]['size_cutoff']
        current_session_event = self.current_session.iloc[self.current_session_index]['cum_session_event_raw']
        if current_session_event <= event_cutoff or current_session_event  >= MAX_EVAL_SIZE:
            return True
        
        param_mid = 0.1 if not self.params else self.params['mid']
        param_large = 0.2 if not self.params else self.params['large']
        param_window = 0.75 if not self.params else self.params['window']
    
        extending_low = self._probability_extending(current_session_event, self.metadata['inc_small']) - \
            (0.05 + np.random.normal(-0.02, 0.1, 100).mean())

            
        extending_medium = self._probability_extending(current_session_event, self.metadata['inc_medium']) - \
            (param_mid + np.random.normal(-0.02, 0.1, 100).mean()) 
            
        extending_large = self._probability_extending(current_session_event, self.metadata['inc_large']) + \
            (param_large + np.random.normal(-0.02, 0.1, 100).mean())
            
        return any([
            extending_low > 0.4 and extending_low <= param_window,
            extending_medium > 0.4 and extending_medium <= param_window,
            extending_large > 0.4 and extending_large <= param_window
        ])
        
           
    
    def _probability_extending(self, current_session_event, incentive_event):
        if incentive_event == 0:
            return 0
         
        continue_session = norm(
            loc=max(incentive_event, 1),
            scale=max(incentive_event *.75, 1)
        ).cdf(max(current_session_event, 1)) 
        
        return continue_session
        

    def _get_events(self, user_id, session):
        subset = self.dataset[
            (self.dataset['user_id'] == user_id) &
            (self.dataset['session_30_count_raw'] == session).copy()
        ]

        subset = subset.sort_values(by=['date_time'])
        return subset
    
    def _take_action(self, action):
        if action == 0:
            return 1
        
        current_session_index = self.current_session_index if \
            self.current_session_index != self.current_session.shape[0] else self.current_session.shape[0] - 1
    
        if action == 1:
            if self.metadata['inc_small'] > 0:
                return 1

            self.metadata['inc_small'] = self.current_session.iloc[current_session_index]['cum_session_event_raw']
            self.metadata['time_small'] = self.current_session.iloc[current_session_index]['cum_session_time_raw']
            return 1
    
        elif action == 2:
            if self.metadata['inc_medium'] > 0:
                return 1
            self.metadata['inc_medium'] = self.current_session.iloc[current_session_index]['cum_session_event_raw']
            self.metadata['time_medium'] = self.current_session.iloc[current_session_index]['cum_session_time_raw']
            return 1
        
        else:
            if self.metadata['inc_large'] > 0:
                return 1
            self.metadata['inc_large'] = self.current_session.iloc[current_session_index]['cum_session_event_raw']
            self.metadata['time_large'] = self.current_session.iloc[current_session_index]['cum_session_time_raw']
            return 1

    def _state(self):

        if self.current_session_index > self.n_sequences:
            events = self.current_session.iloc[self.current_session_index - (self.n_sequences + 1):self.current_session_index][self.out_features]
            events['inc_small'] = self.metadata['inc_small']
            events['inc_medium'] = self.metadata['inc_medium']
            events['inc_large'] = self.metadata['inc_large']
            
            events = events.values
            
            
        else:
            
            delta = min((self.n_sequences + 1)- self.current_session_index, self.n_sequences)
            zero_cat = np.zeros((delta, len(self.out_features) + 3))
            events = self.current_session.iloc[:max(self.current_session_index, 1)][self.out_features]
            
            events['inc_small'] = self.metadata['inc_small']
            events['inc_medium'] = self.metadata['inc_medium']
            events['inc_large'] = self.metadata['inc_large']
            
            
            events = np.concatenate((zero_cat, events), axis=0)
        
        return events.astype(np.float32).T

In [22]:
# %load rl_sensitivity_analysis.py
import os
import logging
import re
import boto3
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p')
global logger, client
logger = logging.getLogger(__name__)
client = boto3.client('s3')
import numpy as np
from itertools import combinations, product
from stable_baselines3 import DQN
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.vec_env import DummyVecEnv, VecMonitor
import pandas as pd
import glob
MIN_MAX_RANGE = (10, 90)
from tqdm import tqdm
N_SEQUENCES = 15
import argparse
S3_BASELINE_PATH = 'dissertation-data-dmiller'

SENSITIVITY_PARAMS = {
    "window": (.8,  .6),
    "mid": {.15, .04},
    "large": {.3, .09},
    
}
import torch
import random

np.random.seed(42)
torch.manual_seed(42)
random.seed(42)
def parse_args():
    parse = argparse.ArgumentParser()
    parse.add_argument('--algo', type=str, default='dqn_pred_cnn'),
    parse.add_argument('--run_date', type=str, default='2023-06-13_16-11-42'),
    parse.add_argument('--write_path', type=str, default='rl_evaluation'),
    parse.add_argument('--n_files', type=int, default=2),
    args = parse.parse_args()
    return args


def find_s3_candidate(algo, run_date):
    
    folder_prefix = os.path.join(
        'experiments',
        algo,
        run_date,
        'checkpoints'
    )

    
    logger.info(f'Looking for files in {folder_prefix}')
    
    files = [
        {
            'key': file['Key'],
            'last_modified': file['LastModified'],
            'check_index': int(re.sub('[^0-9]', '', file['Key'].split('/')[-1]))
        }
        for file in client.list_objects_v2(Bucket=S3_BASELINE_PATH, Prefix=folder_prefix)['Contents']
    ]
    
    s3_candidate = sorted(files, key=lambda x: x['check_index'])[-1]['key']
    
    logger.info(f'Found candiate: {s3_candidate}')
    
    return s3_candidate

def get_policy(algo, run_date):
        
    s3_candidate = find_s3_candidate(algo, run_date)
    if not os.path.exists(os.path.dirname(s3_candidate)):
        logger.info(f'Creating policy path {os.path.dirname(s3_candidate)}')
        
        os.makedirs(os.path.dirname(s3_candidate), exist_ok=True)
       
    # return s3_candidate 
    client.download_file(S3_BASELINE_PATH, s3_candidate, s3_candidate)
    return s3_candidate
        

def simplify_experiment(vectorized_df):
    vectorized_df = [
        df[(df['session_size'] >= MIN_MAX_RANGE[0]) & (df['session_size'] <= MIN_MAX_RANGE[1])] for df in vectorized_df
    ]
    
    vectorized_df = [
        df for df in vectorized_df if len(df) > 0
    ]

    return vectorized_df


def _label_or_pred(algo):
    if 'label' in algo:
        return 'label'
    elif 'pred' in algo:
        return 'pred'
    else:
        return None
   

def run_sensitivity_analysis(env_datasets, policy_path, feature_cols, param_combos): 
    
    p_bar = tqdm(param_combos, unit='item')
    out_df_container = []
    for combo in p_bar:
        params = {
            "window": combo[0].round(2),
            "mid": combo[1].round(2),
            "large": combo[2].round(2)
        }
        p_bar.set_description(f'Running combo {params}')
        
        vev_envs = DummyVecEnv([lambda: CitizenScienceEnv(dataset, feature_cols, N_SEQUENCES, params) for dataset in env_datasets])
        
        vec_monitor = VecMonitor(vev_envs)
        
        model = DQN.load(
            policy_path,
            env=vec_monitor,
            verbose=0,
        )
        
        evaluate_policy(
            model,
            model.get_env(),
            deterministic=False,
            n_eval_episodes=2_000
        )
        
        dists = model.get_env().get_attr('episode_bins')
        values_to_log = [item for sublist in dists for item in sublist if len(sublist) > 0]
        out_df = pd.DataFrame(values_to_log)
        out_df['window'] = params['window']
        out_df['mid'] = params['mid']
        out_df['large'] = params['large']
        out_df_container.append(out_df)
    
    
    return pd.concat(out_df_container)
        

def main(args):
    algo, run_date, write_path, n_files = args.algo, args.run_date, args.write_path, args.n_files

    params_window = np.arange(*SENSITIVITY_PARAMS['window'], -.02).tolist()
    params_mid = np.arange(*SENSITIVITY_PARAMS['mid'], -.01).tolist()
    params_large = np.arange(*SENSITIVITY_PARAMS['large'], -.02).tolist()
    
    logger.info(f'Window params: {params_window}')
    logger.info(f'Mid params: {params_mid}')
    logger.info(f'Large params: {params_large}')
    
    param_combos = np.array(list(product(params_window, params_mid, params_large)))
    logger.info(f'Combination parameters obtained: {param_combos.shape}, running monte carlo simulation on 200 random samples')
    param_combos = param_combos[np.random.choice(param_combos.shape[0], 200, replace=False), :]
    
    policy_path = get_policy(algo, run_date)
    logger.info(f'Policy path downloaded, evaluating experiment: {policy_path}')
    
    read_path = os.path.join('rl_ready_data_conv', f'files_used_{n_files}', 'window_1', 'batched_eval')
    files_to_read = glob.glob(os.path.join(read_path, '*.parquet'))
    logger.info(f'Found {len(files_to_read)} files to read')
    env_datasets = [
        pd.read_parquet(file, columns=LOAD_COLS) for file in files_to_read
    ]

    env_datasets = simplify_experiment(env_datasets)
    feature_cols = FEATURE_COLUMNS + [_label_or_pred(algo)] if _label_or_pred(algo) else FEATURE_COLUMNS
    logger.info(f'Length of features: {len(feature_cols)}')
    logger.info(f'Running sensitivity analysis per monte carlo simulation')
    sensitivity_df = run_sensitivity_analysis(env_datasets, policy_path, feature_cols, param_combos)

    
    write_path = os.path.join(write_path, f'sensitivity_analysis', f'{algo}.parquet')
    if not os.path.exists(os.path.dirname(write_path)):
        logger.info(f'Creating write path {os.path.dirname(write_path)}')
        os.makedirs(os.path.dirname(write_path), exist_ok=True)
    
    logger.info(f'Writing sensitivity analysis to {write_path}')
    sensitivity_df.to_parquet(write_path)



In [23]:
class Args:
    algo = 'dqn_pred_cnn'
    run_date = '2023-06-20_10-38-22'
    write_path = 'rl_evaluation'
    n_files = 30
    

In [24]:
main(Args)

06/21/2023 07:47:59 AM Window params: [0.8, 0.78, 0.76, 0.74, 0.72, 0.7, 0.6799999999999999, 0.6599999999999999, 0.6399999999999999, 0.6199999999999999, 0.5999999999999999]
06/21/2023 07:47:59 AM Mid params: [0.15, 0.13999999999999999, 0.12999999999999998, 0.11999999999999997, 0.10999999999999996, 0.09999999999999995, 0.08999999999999994, 0.07999999999999993, 0.06999999999999992, 0.059999999999999915, 0.049999999999999906]
06/21/2023 07:47:59 AM Large params: [0.3, 0.27999999999999997, 0.25999999999999995, 0.23999999999999994, 0.21999999999999992, 0.1999999999999999, 0.17999999999999988, 0.15999999999999986, 0.13999999999999985, 0.11999999999999983, 0.09999999999999981]
06/21/2023 07:47:59 AM Combination parameters obtained: (1331, 3), running monte carlo simulation on 200 random samples
06/21/2023 07:47:59 AM Looking for files in experiments/dqn_pred_cnn/2023-06-20_10-38-22/checkpoints
06/21/2023 07:48:00 AM Found candiate: experiments/dqn_pred_cnn/2023-06-20_10-38-22/checkpoints/rl_m