In [None]:
!python -m pip install python-dotenv pqdm torch --quiet
!python -m pip install gym stable-baselines3[extra] boto3 --quiet


In [1]:
# %load rl_constant.py
LABEL = [
    "continue_work_session_30_minutes"
]

METADATA = [
    "user_id",
    "session_30_raw",
    "cum_platform_event_raw",
    "cum_platform_time_raw",
    "cum_session_time_raw",
    "global_events_user",
    "global_session_time",
    "date_time",
]

OUT_FEATURE_COLUMNS = [
    "country_count", 
    "date_hour_sin", 
    "date_hour_cos",
    "date_minute_sin",
    "date_minute_cos",
    
    "session_30_count",
    "session_5_count",
    "cum_session_event_count",
    "delta_last_event",
    "cum_session_time",
    
    "expanding_click_average",
    "cum_platform_time",
    "cum_platform_events",
    "cum_projects",
    "average_event_time",
    
    "rolling_session_time",
    "rolling_session_events",
    "rolling_session_gap",
    "previous_session_time",
    "previous_session_events",
]

PREDICTION_COLS = [
    'seq_10',
    'sq_20'
]


GROUPBY_COLS = ['user_id']

RL_STAT_COLS = [
    'session_size',
    'sim_size',
    'session_minutes',
    'sim_minutes',
    'cum_session_event_raw',
    'cum_platform_time_raw',
    'reward',
    'session_30_raw',
    'cum_platform_time_raw',
    'global_session_time',
]


In [None]:
POLICY_LIST = [
    {
        "algo": "DQN",
        "feature_extractor": "CNN",
        "lstm": "label",
         "run_time": "2023-05-23-16-18"
    },
    {
        "algo": "DQN",
        "feature_extractor": "MLP",
        "lstm": "label",
        "run_time": "2023-05-23-15-55"
    }
    {
        "algo": "DQN",
        "feature_extractor": "CNN",
        "lstm": "seq_40",
        "run_time": "2023-05-23-16-31"
    },
    {
        "algo": "DQN",
        "feature_extractor": "MLP",
        "lstm": "seq_40",
        "run_time": "2023-05-24-08-18"
    },
    {
        "algo": "DQN",
        "feature_extractor": "MLP",
        "lstm": "no_pred",
        "run_time": "2023-05-25-20-00"
    }
]



In [6]:
# %load policies/cnn_policy.py
# %load policies/cnn_policy

import torch
from gym import spaces
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from torch import nn


class CustomConv1dFeatures(BaseFeaturesExtractor):
    
    @classmethod
    def setup_sequences_features(cls, n_sequences, n_features):
        cls.n_sequences = n_sequences
        cls.n_features = n_features
        
    
    def __init__(self, observation_space: spaces.Box, features_dim=20):
        super().__init__(observation_space, features_dim)
        
        
        self.cnn_1 = nn.Sequential(
            nn.Conv1d(self.n_features, self.n_features*2, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features*2),
            nn.ReLU(),
            
            nn.Conv1d(self.n_features*2, self.n_features*2, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features*2),
            nn.ReLU(),
            
            nn.Conv1d(self.n_features*2, self.n_features*2, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features*2),
            nn.Conv1d(self.n_features*2, self.n_features*2, kernel_size=3, padding=1),
            
            nn.AvgPool1d(2)
        )
        
        self.cnn_2 = nn.Sequential(
            nn.Conv1d(self.n_features*2, self.n_features, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features),
            nn.ReLU(),
            
            nn.Conv1d(self.n_features, self.n_features, kernel_size=3, padding=1),
            nn.BatchNorm1d(self.n_features),
            nn.ReLU()
        )
        
        self.act = nn.Sequential(
            nn.MaxPool1d(2),
            nn.Flatten(),
        )
        
        with torch.no_grad():
            out_shape = self.act(self.cnn_2(self.cnn_1(torch.zeros((1, self.n_features, self.n_sequences))))).shape[1]
            self.linear = nn.Linear(out_shape, features_dim)
    
    def forward(self, obs):
        out = self.cnn_1(obs)
        out = self.cnn_2(out)
        out = self.act(out)
        return self.linear(out)


        

In [4]:
# %load environment_eval
# %load environment
import gym
import numpy as np

from scipy.stats import norm

import numpy as np
from scipy.stats import norm 
import gym

class CitizenScienceEnv(gym.Env):
    
    metadata = {'render.modes': ['human']}
    
    def __init__(self, dataset, out_features, n_sequences):
        """
        trajectories: dictionary of user_id to their respective trajectories.
        n_sequences: number of sequences used for preprocessing.
        n_features: number of features used for preprocessing.
        """
        super(CitizenScienceEnv, self).__init__()
        self.dataset = dataset
        self.n_sequences = n_sequences
        self.current_session = None
        self.current_session_index = 0
        self.reward = 0
        self.n_sequences = n_sequences
        self.out_features = out_features
        
        self.action_space = gym.spaces.Discrete(2)
        self.observation_space = gym.spaces.Box(low=-1, high=1, shape=(len(out_features), n_sequences + 1), dtype=np.float32)

    def reset(self):
        user_to_run, session_to_run = self.dataset.sample(1)[['user_id', 'session_30_raw']].values[0]
        self.current_session = self._get_events(user_to_run, session_to_run)
        self.metadata = self._metadata()
        self.current_session_index = 0
        self.reward = 0
        return self._state()
    
    def _row_to_dict(self, metadata):
        """
        Convert a row of metadata to a dictionary.
        """
        return metadata.to_dict()

    def step(self, action):
        self._take_action(action)

        next_state, done, meta = self._calculate_next_state()
        
        if done:
            current_session_index = self.current_session_index if \
                self.current_session_index != self.current_session.shape[0] else self.current_session_index - 1
        
            self.metadata['ended'] = self.current_session.iloc[current_session_index]['cum_session_event_raw']
            self.metadata['reward'] = self.reward
            meta = self._row_to_dict(self.metadata)
            return next_state, float(self.reward), done, meta
        else:
            self.reward = self.current_session.iloc[self.current_session_index]['reward'] 
            self.current_session_index += 1        
        return next_state, float(self.reward), done, meta
    
    def _metadata(self):
        session_metadata = self.current_session.iloc[0][RL_STAT_COLS]
        session_metadata['ended'] = 0
        session_metadata['incentive_index'] = 0
        return session_metadata
    
    
    def _calculate_next_state(self):
        
        if (self.current_session_index == self.current_session.shape[0]):
            return None, True, {}

        if self._continuing_in_session():
            return self._state(), False, {}
    
        return None, True, {}
        
      
  
    def _continuing_in_session(self):
        sim_counts = self.metadata['sim_size']
        current_session_count = self.current_session.iloc[self.current_session_index]['cum_session_event_raw']
        if current_session_count <= sim_counts:
            return True
        
        extending_session = self._probability_extending_session(current_session_count)
        
        return all([extending_session >= .3, extending_session <= .7])
        
    
    def _probability_extending_session(self, current_session_count):
        if self.metadata['incentive_index'] == 0:
            return 0
        
        scale = max(5, int(self.metadata['session_size'] / 4))
        continue_session = norm(
            loc=self.metadata['incentive_index'],
            scale=scale
        ).cdf(current_session_count)
        
        return continue_session
        

    def _get_events(self, user_id, session):
        subset = self.dataset[
            (self.dataset['user_id'] == user_id) &
            (self.dataset['session_30_raw'] == session)
        ]
   
        return subset.sort_values(by=['date_time']).reset_index(drop=True)
    
    def _take_action(self, action):
        if action == 0 or self.metadata['incentive_index'] > 0:
            return
        
        current_session_index = min(self.current_session_index, self.current_session.shape[0] - 1)
        self.metadata['incentive_index'] = self.current_session.iloc[current_session_index]['cum_session_event_raw']
        self.metadata['incentive_time'] = self.current_session.iloc[current_session_index]['cum_session_time_raw']
        
    def _state(self):

        if self.current_session_index > self.n_sequences:
            events = self.current_session.iloc[self.current_session_index - (self.n_sequences + 1):self.current_session_index][self.out_features].values
            
        else:
            delta = min((self.n_sequences + 1)- self.current_session_index, self.n_sequences)
            zero_cat = np.zeros((delta, len(self.out_features)))
            events = self.current_session.iloc[:max(self.current_session_index, 1)][self.out_features].values
            events = np.concatenate((zero_cat, events), axis=0)
            

        return events.astype(np.float32).T
  
    


In [2]:
# %load rl_results_eval_cpu.py
import logging
import os
import boto3
import numpy as np
import pandas as pd
import torch
from policies.cnn_policy import CustomConv1dFeatures
from rl_constant import LABEL, METADATA, OUT_FEATURE_COLUMNS, PREDICTION_COLS, RL_STAT_COLS
from stable_baselines3 import DQN, PPO, A2C, SAC, TD3
import json
from pqdm.processes import pqdm
ALL_COLS = LABEL + METADATA + OUT_FEATURE_COLUMNS  + PREDICTION_COLS

logging.basicConfig(format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p', level=logging.INFO)
np.set_printoptions(precision=4, linewidth=200, suppress=True)
torch.set_printoptions(precision=2, linewidth=200, sci_mode=False)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.max_rows', 500)
S3_BASELINE_PATH = 'dissertation-data-dmiller'
USER_INDEX = 1
SESSION_INDEX = 2
CUM_SESSION_EVENT_RAW = 3
TIMESTAMP_INDEX = 11
TRAIN_SPLIT = 0.7
N_SEQUENCES = 40
EVAL_SPLIT = 0.15

global logger

logger = logging.getLogger('rl_results_eval')
logger.setLevel(logging.INFO)

def find_s3_candidate(client, feature_extractor, lstm, run_time):
    
    if lstm == 'seq_20':
        lstm = 'seq_40'
    folder_prefix = os.path.join(
        'reinforcement_learning_incentives',
        'n_files_30',
        f'{feature_extractor}_{lstm}',
        'results',
        run_time,
        'checkpoints',
    )
    
    logger.info(f'Looking for files in {folder_prefix}')
    
    files = [
        {
            'key': file['Key'],
            'last_modified': file['LastModified'],
        }
        for file in client.list_objects_v2(Bucket=S3_BASELINE_PATH, Prefix=folder_prefix)['Contents']
    ]
    
    s3_candidate = max(files, key=lambda x: x['last_modified'])['key']
    
    logger.info(f'Found candiate: {s3_candidate}')
    
    return s3_candidate

def get_policy(client, feature_extractor, lstm, run_time, algo):
    
    
    s3_candidate = find_s3_candidate(client, feature_extractor, lstm, run_time)
    
    model_base_path, download_path = (
        os.path.join('reinforcement_learning_incentives', f'{feature_extractor}_{lstm}'),
        os.path.join('reinforcement_learning_incentives', f'{feature_extractor}_{lstm}', f'{algo}.zip') 
    )
    
    if not os.path.exists(model_base_path):
        logger.info(f'Creating directory {model_base_path}')
        os.makedirs(model_base_path)
        client.download_file(S3_BASELINE_PATH, s3_candidate, download_path)
        logger.info(f'Loading model from {s3_candidate} to {download_path}')

    logger.info(f'Checkpoint load path: {download_path}')
    return download_path
        
def _lstm_loader(lstm):
    if lstm == 'no_pred':
        return []

    return LABEL if lstm == 'label' else ['seq_20']

def run_session(args):
    dataset, feature_meta, model, out_features, n_sequences, info_container = args
    _, feature_meta = feature_meta
    subset = dataset[
        (dataset['user_id'] == feature_meta['user_id']) &
        (dataset['session_30_raw'] == feature_meta['session_30_raw'])
    ]
    
    env = CitizenScienceEnv(subset, out_features, n_sequences)
    step = env.reset()
    done = False
    while not done:
        action, _states = model.predict(step, deterministic=True)
        step, rewards, done, info = env.step(action)
    info_container.append(info)
    
    return info_container

def run_experiment(model, dataset, out_features, n_sequences):
    
    info_container = []
    
    dataset = dataset.loc[:,~dataset.columns.duplicated()].copy()
    info_container = []
    unique_sessions = dataset[['user_id', 'session_30_raw']].drop_duplicates() 
    logger.info(f'Running experiment with {model}: n_session={len(unique_sessions)}')
    
    args = [
        (dataset, feature_meta, model, out_features, n_sequences, info_container) for _, feature_meta in unique_sessions.iterrows()
    ]
    
    pqdm(args, run_session, n_jobs=8)
    
    return info_container

        
    
        

def get_dataset(conv_path, n_files, window, part='train'):
    
    
    conv_path =  os.path.join(conv_path, f'files_used_{n_files}')


    if not os.path.exists(conv_path):
        logger.info(f'Creating directory {conv_path}')
        os.makedirs(conv_path)
        
    
    conv_path = os.path.join(conv_path, f'window_{window}_{part}.parquet')
    
    if not os.path.exists(conv_path):
        logger.info(f'Convolutional dataset not found at {conv_path}: creating')
        logger.info(f'Getting dataset from bucket: {S3_BASELINE_PATH}, key: {conv_path}')
        client.download_file(S3_BASELINE_PATH, conv_path, conv_path)
        

    logger.info(f'Loading convolutional dataset from {conv_path}')
    df = pd.read_parquet(conv_path)
        
    logger.info(f'Dataset loaded: {df.shape}')
    
    return df


def run_exp_wrapper(args, df, write_path):
        policy_weights = get_policy(client, args['feature_extractor'].lower(), args['lstm'], args['run_time'], args['algo'])
        print(policy_weights)
        all_features, out_features = (
            METADATA + OUT_FEATURE_COLUMNS + RL_STAT_COLS + _lstm_loader(args['lstm']),
            OUT_FEATURE_COLUMNS + _lstm_loader(args['lstm'])
        )
        df = df[all_features]
        env = CitizenScienceEnv(df, out_features, 40)
        
        if args['feature_extractor'].lower() == 'cnn':
            CustomConv1dFeatures.setup_sequences_features(N_SEQUENCES + 1, 21)
            logger.info(f'Using custom CNN feature extractor')
            policy_kwargs = dict(
                features_extractor_class=CustomConv1dFeatures,
                net_arch=[10]
            )
        
            model = DQN(policy='CnnPolicy', env=env, policy_kwargs=policy_kwargs)
            model.set_parameters(policy_weights)
            
        experiment = run_experiment(model, df, out_features, N_SEQUENCES)
        experiemnt_df = pd.DataFrame(experiment)
        
        logger.info(f'Finished experiment: {args}')
        if not os.path.exists(write_path):
            os.makedirs(write_path)
        
        write_path = os.path.join(
            write_path,
            f'{args["algo"]}_{args["feature_extractor"]}_{args["lstm"]}_{args["run_time"]}.parquet'   
        )
        
        logger.info(f'Writing experiment to {write_path}')
        
        experiemnt_df.to_parquet(write_path)
        
    
    
     
def main(args):
    
    global client
    client = boto3.client('s3')
    logger.info('Starting offlline evaluation of RL model')
    
    conv_path, write_path, n_files, window, data_part = (
        args.read_path,
        args.write_path,
        args.n_files, 
        args.device, 
        args.window, 
        args.data_part
    )
    
    
    df = get_dataset(conv_path, n_files, window, data_part)
    df = df[:10000]
    for r in POLICY_LIST:
        logger.info(f'Running evaluation for {r}')
        run_exp_wrapper(r, df.copy(), write_path)
        
   

In [None]:
class Argument:
    read_path = 'rl_ready_data_conv'
    write_path = 'rl_results'
    n_files = 2
    n_sequences = 40
    window = 2
    data_part = 'train'

In [None]:
import logging
import boto3