# pip install

In [None]:
pip install gymnasium


In [None]:
pip install "stable-baselines3[extra]>=2.0.0a4"

In [None]:
import numpy as np

import gymnasium as gym
from gymnasium import spaces

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env, SubprocVecEnv, DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy

## set reproducibility

In [None]:
import torch
import numpy as np
import random
import os
os.environ['PYTHONASHSEED'] = '0'
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"

seed = 0
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Learning Rate Func

In [None]:

def linear_schedule(initial_value):

    if isinstance(initial_value, str):
        initial_value = float(initial_value)

    def func(progress):

        return progress * initial_value

    return func

# Ranking Func

In [None]:
#local modules directory for import
DIR = '/xxhome/' # replace with utils home directory


sys.path.append(DIR)


# import utils fns
from utils.read_data_fns import *
from utils.eval_fns import *

In [None]:
df_all_targets = pd.DataFrame()
DRL_DIR = '/xx/' # replace with working directory


# TAREnv class



In [None]:
SELECTED_TOPICS = [] # keep track of all randomly selected topics


In [None]:
# import RL env
from rl_utils.rlstop_tar_env import *
from rl_utils.ranking_utils import *



## Hyperparameter Settings

In [None]:
TRAINING = True
total_runs = 10

# Train the agent
ent_coef = 0.01

gamma = 0.99
learning_rate_initial = 0.0001

learning_rate = linear_schedule(learning_rate_initial)
clip_range=0.2

n_steps = 100
batch_size = 100
n_epochs =8

model_name = 'reward_1-1_'
learning_rate_type = '_linear_schedule'+str(learning_rate_initial)

learning_rate_type = '_lr_static'+str(learning_rate_initial)

total_timesteps = 100_000

tensorboard_log = '/logs/'



# Target 0.9

In [None]:
target_recall = 0.9 # replace with other target recall level

## Training

In [None]:
TRAINING = True

In [None]:
training_dataset = 'CLEF'


#### sort topics by target location

In [None]:

dataset_name = 'CLEF2017'


qrels = "data/qrels/CLEF2017_qrels.txt"


qrel_fname, query_rel_dic = load_rel_data(qrels)
print("Number of topics:", len(query_rel_dic))

run = "data/rankings/clef2017_training_ranking.txt"

doc_rank_dic, rank_rel_dic = load_run_data(run)

topics_list = make_topics_list(doc_rank_dic,1)

In [None]:
#remove topic CD008760 last element, contains 64 items only, < 100 vector size
topics_list= topics_list[:-1]


In [None]:
topics_info = []

for t in topics_list:
  topic_id, n_docs, n_rel, prev, target_location = load_topic_target_location(t,target_recall)
  print(topic_id, n_docs, n_rel, round(prev,3), target_location)
  topics_info.append([topic_id, n_docs, n_rel, prev, target_location])

topics_info

In [None]:
# import pandas as pd
import pandas as pd


df = pd.DataFrame(topics_info, columns=['topic_id', 'n_docs', 'n_rel', 'prev', 'target_location'])
df = df.sort_values(by=['target_location'])
df

In [None]:
sorted_target_loc_topics = list(df['topic_id'])
sorted_target_loc_topics

####ordered topics

In [None]:
TRAINING = True

SELECTED_TOPICS_ORDERERD = sorted_target_loc_topics
SELECTED_TOPICS_ORDERERD_INDEX = 0

# Instantiate the vec env

#random topic selection for each env instance
SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

vec_env = make_vec_env(TAREnv, n_envs=len(topics_list), env_kwargs=dict(target_recall=target_recall, topics_list = topics_list, topic_id=None, size=100, render_mode='human'))

SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

train_size = len(topics_list)
vec_env_train = vec_env

#### PPO

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [None]:

tb_log_name = model_name+"_"+training_dataset+"_ppo_gma_"+str(gamma)+"_nsteps"+str(n_steps)+"_btch"+str(batch_size)+"_timesteps_"+str(total_timesteps)+ "_ent_coef"+str(ent_coef)+ learning_rate_type +"_n_epochs"+str(n_epochs)+"_target"+str(target_recall)

model = PPO(
    policy = 'MlpPolicy',
    env = vec_env_train,
    n_steps = n_steps,
    batch_size = batch_size,
    n_epochs = n_epochs,
    gamma = gamma,
    gae_lambda = 0.98,
    ent_coef = ent_coef,
    verbose=1,
    learning_rate = learning_rate,
    seed=0,
    tensorboard_log= tensorboard_log)

model.learn(total_timesteps=total_timesteps, tb_log_name=tb_log_name)


model.save(tensorboard_log+'model_'+tb_log_name)




In [None]:
%tensorboard --logdir "$tensorboard_log"


## TESTING

In [None]:
TRAINING = False

###clef2017

In [None]:

dataset_name = 'CLEF2017'


qrels = "data/qrels/CLEF2017_qrels.txt"


qrel_fname, query_rel_dic = load_rel_data(qrels)
print("Number of topics:", len(query_rel_dic))

run = "data/rankings/clef2017_ranking.txt"

doc_rank_dic, rank_rel_dic = load_run_data(run)

topics_list = make_topics_list(doc_rank_dic,1)  # sort topics by no docs

In [None]:
# Instantiate the vec env

#random topic selection for each env instance
SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

vec_env = make_vec_env(TAREnv, n_envs=len(topics_list), env_kwargs=dict(target_recall=target_recall, topics_list = topics_list, topic_id=None, size=100, render_mode='human'))

SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics


In [None]:
test_size = len(topics_list)
vec_env_test = vec_env

#### TAR Eval

In [None]:

df = pd.DataFrame()
df_all_runs = pd.DataFrame()

for run in range(total_runs):

  # Test the trained agent
  vec_env_test = vec_env
  obs = vec_env_test.reset()
  test_steps = 100


  n_env = test_size
  agent=0
  target=0
  agent_vector=[]
  terminal_observation=[]

  topics = []
  recalls = []
  costs=[]
  e_costs = []
  reliabilities = []
  rewards = []
  distances = []
  differences = []
  targets = []
  run_cnts = []

  for eID in range(test_size):

    env = vec_env_test.envs[eID]
    obs, info = env.reset()

    for step in range(test_steps):
      action, _ = model.predict(obs, deterministic=False) # predict all next steps
      obs, reward, done, trun,info = env.step(action)


      if done or trun:
                  topic_id = info['topic_id']
                  recall = info['recall']
                  cost = info['cost']
                  e_cost =  ((info['agent'] - info['target']) / (100-info['target']))
                  distance = info['distance']

                  agent = info['agent']
                  target = info['target']
                  agent_vector = info['agent_vector']
                  terminal_observation = info['terminal_observation']

                  difference = target_recall - recall

                  reliability = 1 if recall >= target_recall else 0
                  topics.append(topic_id)
                  recalls.append(recall)
                  costs.append(cost)
                  e_costs.append(e_cost)
                  reliabilities.append(reliability)
                  rewards.append(reward)
                  distances.append(distance)
                  targets.append(target)
                  run_cnts.append(run)
                  differences.append(difference)

                  df_tmp = pd.DataFrame( list(zip([dataset_name]*len(topics_list), topics, run_cnts, recalls, reliabilities, costs, e_costs, rewards, differences, distances, targets)),
                  columns =['Dataset', 'Topic', 'Run', 'Recall', 'Reliability', 'Cost', 'e-Cost', 'Reward', 'Difference', 'Distance', 'Target'])

                  df = pd.concat([df_tmp])

                  df.groupby('Topic').mean()

                  break

  display(df)
  df.groupby('Topic').mean()
  df_all_runs = pd.concat([df_all_runs, df])

df_all_runs['Model'] = model_name
df_all_runs['Model_settings'] = tb_log_name
df_all_runs['Target_Recall'] = target_recall
df_all_runs

In [None]:
display(df_all_runs.groupby('Topic').mean())
display(df_all_runs.groupby('Topic').std())


#### df_all_targets

In [None]:

df_all_targets = pd.concat([df_all_targets, df_all_runs], ignore_index = True)


display(df_all_targets)

df_all_targets.describe()

In [None]:
display(df_all_targets.groupby(['Target_Recall','Dataset']).mean().round(3))
display(df_all_targets.groupby(['Target_Recall','Dataset']).std().round(3))

###clef2018

In [None]:

dataset_name = 'CLEF2018'


qrels = "data/qrels/CLEF2018_qrels_LiKs.txt" # use the same qrel list as their rankings


qrel_fname, query_rel_dic = load_rel_data(qrels)
print("Number of topics:", len(query_rel_dic))

run = "data/rankings/clef2018_ranking.txt"

doc_rank_dic, rank_rel_dic = load_run_data(run)

topics_list = make_topics_list(doc_rank_dic,1)  # sort topics by no docs

In [None]:
# Instantiate the vec env

#random topic selection for each env instance
SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

vec_env = make_vec_env(TAREnv, n_envs=len(topics_list), env_kwargs=dict(target_recall=target_recall, topics_list = topics_list, topic_id=None, size=100, render_mode='human'))

SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics


In [None]:
test_size = len(topics_list)
vec_env_test = vec_env

#### TAR Eval

In [None]:

df = pd.DataFrame()
df_all_runs = pd.DataFrame()

for run in range(total_runs):

  # Test the trained agent
  vec_env_test = vec_env
  obs = vec_env_test.reset()
  test_steps = 100


  n_env = test_size
  agent=0
  target=0
  agent_vector=[]
  terminal_observation=[]

  topics = []
  recalls = []
  costs=[]
  e_costs = []
  reliabilities = []
  rewards = []
  distances = []
  differences = []
  targets = []
  run_cnts = []

  for eID in range(test_size):

    env = vec_env_test.envs[eID]
    obs, info = env.reset()

    for step in range(test_steps):
      action, _ = model.predict(obs, deterministic=False) # predict all next steps
      obs, reward, done, trun,info = env.step(action)


      if done or trun:
                  topic_id = info['topic_id']
                  recall = info['recall']
                  cost = info['cost']
                  e_cost =  ((info['agent'] - info['target']) / (100-info['target']))
                  distance = info['distance']

                  agent = info['agent']
                  target = info['target']
                  agent_vector = info['agent_vector']
                  terminal_observation = info['terminal_observation']

                  difference = target_recall - recall

                  reliability = 1 if recall >= target_recall else 0
                  topics.append(topic_id)
                  recalls.append(recall)
                  costs.append(cost)
                  e_costs.append(e_cost)
                  reliabilities.append(reliability)
                  rewards.append(reward)
                  distances.append(distance)
                  targets.append(target)
                  run_cnts.append(run)
                  differences.append(difference)

                  df_tmp = pd.DataFrame( list(zip([dataset_name]*len(topics_list), topics, run_cnts, recalls, reliabilities, costs, e_costs, rewards, differences, distances, targets)),
                  columns =['Dataset', 'Topic', 'Run', 'Recall', 'Reliability', 'Cost', 'e-Cost', 'Reward', 'Difference', 'Distance', 'Target'])

                  df = pd.concat([df_tmp])

                  df.groupby('Topic').mean()

                  break

  display(df)
  df.groupby('Topic').mean()
  df_all_runs = pd.concat([df_all_runs, df])

df_all_runs['Model'] = model_name
df_all_runs['Model_settings'] = tb_log_name
df_all_runs['Target_Recall'] = target_recall
df_all_runs

In [None]:
display(df_all_runs.groupby('Topic').mean())
display(df_all_runs.groupby('Topic').std())


#### df_all_targets

In [None]:

df_all_targets = pd.concat([df_all_targets, df_all_runs], ignore_index = True)


display(df_all_targets)

df_all_targets.describe()

In [None]:
display(df_all_targets.groupby(['Target_Recall','Dataset']).mean().round(3))
display(df_all_targets.groupby(['Target_Recall','Dataset']).std().round(3))

###clef2019

In [None]:

dataset_name = 'CLEF2019'


qrels = "data/qrels/CLEF2019_qrels_LiKs.txt" # use the same qrel list as their rankings


qrel_fname, query_rel_dic = load_rel_data(qrels)
print("Number of topics:", len(query_rel_dic))

run = "data/rankings/clef2019_ranking.txt"

doc_rank_dic, rank_rel_dic = load_run_data(run)

topics_list = make_topics_list(doc_rank_dic,1)  # sort topics by no docs

In [None]:
#remove topic CD012164 last element, contains 61 items only, < 100 vector size
topics_list= topics_list[:-1]


In [None]:
# Instantiate the vec env

#random topic selection for each env instance
SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

vec_env = make_vec_env(TAREnv, n_envs=len(topics_list), env_kwargs=dict(target_recall=target_recall, topics_list = topics_list, topic_id=None, size=100, render_mode='human'))

SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics


In [None]:
test_size = len(topics_list)
vec_env_test = vec_env

#### TAR Eval

In [None]:

df = pd.DataFrame()
df_all_runs = pd.DataFrame()

for run in range(total_runs):

  # Test the trained agent
  vec_env_test = vec_env
  obs = vec_env_test.reset()
  test_steps = 100


  n_env = test_size
  agent=0
  target=0
  agent_vector=[]
  terminal_observation=[]

  topics = []
  recalls = []
  costs=[]
  e_costs = []
  reliabilities = []
  rewards = []
  distances = []
  differences = []
  targets = []
  run_cnts = []

  for eID in range(test_size):

    env = vec_env_test.envs[eID]
    obs, info = env.reset()

    for step in range(test_steps):
      action, _ = model.predict(obs, deterministic=False) # predict all next steps
      obs, reward, done, trun,info = env.step(action)


      if done or trun:
                  topic_id = info['topic_id']
                  recall = info['recall']
                  cost = info['cost']
                  e_cost =  ((info['agent'] - info['target']) / (100-info['target']))
                  distance = info['distance']

                  agent = info['agent']
                  target = info['target']
                  agent_vector = info['agent_vector']
                  terminal_observation = info['terminal_observation']

                  difference = target_recall - recall

                  reliability = 1 if recall >= target_recall else 0
                  topics.append(topic_id)
                  recalls.append(recall)
                  costs.append(cost)
                  e_costs.append(e_cost)
                  reliabilities.append(reliability)
                  rewards.append(reward)
                  distances.append(distance)
                  targets.append(target)
                  run_cnts.append(run)
                  differences.append(difference)

                  df_tmp = pd.DataFrame( list(zip([dataset_name]*len(topics_list), topics, run_cnts, recalls, reliabilities, costs, e_costs, rewards, differences, distances, targets)),
                  columns =['Dataset', 'Topic', 'Run', 'Recall', 'Reliability', 'Cost', 'e-Cost', 'Reward', 'Difference', 'Distance', 'Target'])

                  df = pd.concat([df_tmp])

                  df.groupby('Topic').mean()

                  break

  display(df)
  df.groupby('Topic').mean()
  df_all_runs = pd.concat([df_all_runs, df])

df_all_runs['Model'] = model_name
df_all_runs['Model_settings'] = tb_log_name
df_all_runs['Target_Recall'] = target_recall
df_all_runs

In [None]:
display(df_all_runs.groupby('Topic').mean())
display(df_all_runs.groupby('Topic').std())


#### df_all_targets

In [None]:

df_all_targets = pd.concat([df_all_targets, df_all_runs], ignore_index = True)


display(df_all_targets)

df_all_targets.describe()

In [None]:
display(df_all_targets.groupby(['Target_Recall','Dataset']).mean().round(3))
display(df_all_targets.groupby(['Target_Recall','Dataset']).std().round(3))

##TREC-TR

### Training

In [None]:
TRAINING = True


In [None]:
training_dataset = 'TREC-TR'

#### sort topics by target location

In [None]:


dataset_name = 'TREC-TR'

qrels = "data/qrels/TREC_TR_Training_qrels.txt"


qrel_fname, query_rel_dic = load_rel_data(qrels)
print("Number of topics:", len(query_rel_dic))

run = "data/rankings/tr_training_ranking.txt"

doc_rank_dic, rank_rel_dic = load_run_data(run)



topics_list = make_topics_list(doc_rank_dic,1)  # sort topics by no docs

In [None]:
topics_info = []

for t in topics_list:
  topic_id, n_docs, n_rel, prev, target_location = load_topic_target_location(t,target_recall)
  print(topic_id, n_docs, n_rel, round(prev,3), target_location)
  topics_info.append([topic_id, n_docs, n_rel, prev, target_location])

topics_info

In [None]:

df = pd.DataFrame(topics_info, columns=['topic_id', 'n_docs', 'n_rel', 'prev', 'target_location'])
df = df.sort_values(by=['target_location'])
df

In [None]:
sorted_target_loc_topics = list(df['topic_id'])
sorted_target_loc_topics

####ordered topics

In [None]:
TRAINING = True

SELECTED_TOPICS_ORDERERD = sorted_target_loc_topics
SELECTED_TOPICS_ORDERERD_INDEX = 0
# Instantiate the vec env

#random topic selection for each env instance
SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

vec_env = make_vec_env(TAREnv, n_envs=len(topics_list), env_kwargs=dict(target_recall=target_recall, topics_list = topics_list, topic_id=None, size=100, render_mode='human'))

SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

train_size = len(topics_list)
vec_env_train = vec_env

####  PPO

In [None]:


tb_log_name = model_name+"_"+training_dataset+"_ppo_gma_"+str(gamma)+"_nsteps"+str(n_steps)+"_btch"+str(batch_size)+"_timesteps_"+str(total_timesteps)+ "_ent_coef"+str(ent_coef)+ learning_rate_type +"_n_epochs"+str(n_epochs)+"_target"+str(target_recall)

model = PPO(
    policy = 'MlpPolicy',
    env = vec_env_train,
    n_steps = n_steps,
    batch_size = batch_size,
    n_epochs = n_epochs,
    gamma = gamma,
    gae_lambda = 0.98,
    ent_coef = ent_coef,
    verbose=1,
    learning_rate = learning_rate,
    seed=0,
    tensorboard_log= tensorboard_log)

model.learn(total_timesteps=total_timesteps, tb_log_name=tb_log_name)


model.save(tensorboard_log+'model_'+tb_log_name)




In [None]:
%tensorboard --logdir "$tensorboard_log"


### Testing

In [None]:
dataset_name = 'TREC-TR'




qrels = "data/qrels/TREC_TR_Test_qrels.txt" # use the same qrel list as their rankings


qrel_fname, query_rel_dic = load_rel_data(qrels)
print("Number of topics:", len(query_rel_dic))

run = "data/rankings/tr_test_ranking.txt"

doc_rank_dic, rank_rel_dic = load_run_data(run)

topics_list = make_topics_list(doc_rank_dic,1)  # sort topics by no docs


In [None]:
# Instantiate the vec env

#random topic selection for each env instance
SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

vec_env = make_vec_env(TAREnv, n_envs=len(topics_list), env_kwargs=dict(target_recall=target_recall, topics_list = topics_list, topic_id=None, size=100, render_mode='human'))

SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics


In [None]:
test_size = len(topics_list)
vec_env_test = vec_env

#### TAR Eval

In [None]:

df = pd.DataFrame()
df_all_runs = pd.DataFrame()

for run in range(total_runs):

  # Test the trained agent
  vec_env_test = vec_env
  obs = vec_env_test.reset()
  test_steps = 100


  n_env = test_size
  agent=0
  target=0
  agent_vector=[]
  terminal_observation=[]

  topics = []
  recalls = []
  costs=[]
  e_costs = []
  reliabilities = []
  rewards = []
  distances = []
  differences = []
  targets = []
  run_cnts = []

  for eID in range(test_size):

    env = vec_env_test.envs[eID]
    obs, info = env.reset()

    for step in range(test_steps):
      action, _ = model.predict(obs, deterministic=False) # predict all next steps
      obs, reward, done, trun,info = env.step(action)


      if done or trun:
                  topic_id = info['topic_id']
                  recall = info['recall']
                  cost = info['cost']
                  e_cost =  ((info['agent'] - info['target']) / (100-info['target']))
                  distance = info['distance']

                  agent = info['agent']
                  target = info['target']
                  agent_vector = info['agent_vector']
                  terminal_observation = info['terminal_observation']

                  difference = target_recall - recall

                  reliability = 1 if recall >= target_recall else 0
                  topics.append(topic_id)
                  recalls.append(recall)
                  costs.append(cost)
                  e_costs.append(e_cost)
                  reliabilities.append(reliability)
                  rewards.append(reward)
                  distances.append(distance)
                  targets.append(target)
                  run_cnts.append(run)
                  differences.append(difference)

                  df_tmp = pd.DataFrame( list(zip([dataset_name]*len(topics_list), topics, run_cnts, recalls, reliabilities, costs, e_costs, rewards, differences, distances, targets)),
                  columns =['Dataset', 'Topic', 'Run', 'Recall', 'Reliability', 'Cost', 'e-Cost', 'Reward', 'Difference', 'Distance', 'Target'])

                  df = pd.concat([df_tmp])

                  df.groupby('Topic').mean()

                  break

  display(df)
  df.groupby('Topic').mean()
  df_all_runs = pd.concat([df_all_runs, df])

df_all_runs['Model'] = model_name
df_all_runs['Model_settings'] = tb_log_name
df_all_runs['Target_Recall'] = target_recall
df_all_runs

In [None]:
display(df_all_runs.groupby('Topic').mean())
display(df_all_runs.groupby('Topic').std())


#### df_all_targets

In [None]:

df_all_targets = pd.concat([df_all_targets, df_all_runs], ignore_index = True)


display(df_all_targets)

df_all_targets.describe()

In [None]:
display(df_all_targets.groupby(['Target_Recall','Dataset']).mean().round(3))
display(df_all_targets.groupby(['Target_Recall','Dataset']).std().round(3))

##RCV1

### Training

In [None]:
TRAINING = True


In [None]:
training_dataset = 'RCV1'

#### sort topics by target location

In [None]:
# LOAD RUN DATA
def load_run_data(run):
  run_fname = os.path.join(DIR, run)
  with open(run_fname, 'r', encoding='utf-8-sig') as infile: # resolve file encoding problem !!
    run_data = infile.readlines()
  doc_rank_dic = make_rank_dic(run_data)  # make dictionary of ranked docids for each queryid
  rank_rel_dic = make_rank_rel_dic(query_rel_dic,doc_rank_dic) # make dic of list relevances of ranked docs for each queryid

  #return doc_rank_dic, rank_rel_dic, rank_text_dic
  return doc_rank_dic, rank_rel_dic




dataset_name = 'RCV1'

qrels = "data/qrels/rcv1_qrels_selected_wo45_0.2.txt" # use the same qrel list as their rankings


qrel_fname, query_rel_dic = load_rel_data(qrels)
print("Number of topics:", len(query_rel_dic))

run = "data/rankings/RCV1_test_20wo45_ranking.txt"
run = "data/rankings/temp/RCV1_test_20wo45_ranking_utf8_2.txt"

doc_rank_dic, rank_rel_dic = load_run_data(run)###



topics_list = make_topics_list(doc_rank_dic,1)  # sort topics by no docs

In [None]:
# only selected 1st level topics
topics_list = ['C151', 'C171', 'C181', 'C311', 'C331', 'C411', 'E121', 'E131', 'E141', 'E211', 'E311', 'E411', 'E511', 'G151', 'M131', 'M141']

In [None]:
topics_info = []

for t in topics_list:
  topic_id, n_docs, n_rel, prev, target_location = load_topic_target_location(t,target_recall)
  print(topic_id, n_docs, n_rel, round(prev,3), target_location)
  topics_info.append([topic_id, n_docs, n_rel, prev, target_location])

topics_info

In [None]:
# import pandas as pd
import pandas as pd


df = pd.DataFrame(topics_info, columns=['topic_id', 'n_docs', 'n_rel', 'prev', 'target_location'])
df = df.sort_values(by=['target_location'])
#df = df.sort_values(by=['target_location'],ascending=False)
df

In [None]:
sorted_target_loc_topics = list(df['topic_id'])
sorted_target_loc_topics

####ordered topics

In [None]:
TRAINING = True

SELECTED_TOPICS_ORDERERD = sorted_target_loc_topics
SELECTED_TOPICS_ORDERERD_INDEX = 0
# Instantiate the vec env

#random topic selection for each env instance
SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

vec_env = make_vec_env(TAREnv, n_envs=len(topics_list), env_kwargs=dict(target_recall=target_recall, topics_list = topics_list, topic_id=None, size=100, render_mode='human'))

SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

train_size = len(topics_list)
vec_env_train = vec_env

#### PPO

In [None]:


tb_log_name = model_name+"_"+training_dataset+"_ppo_gma_"+str(gamma)+"_nsteps"+str(n_steps)+"_btch"+str(batch_size)+"_timesteps_"+str(total_timesteps)+ "_ent_coef"+str(ent_coef)+ learning_rate_type +"_n_epochs"+str(n_epochs)+"_target"+str(target_recall)

model = PPO(
    policy = 'MlpPolicy',
    env = vec_env_train,
    n_steps = n_steps,
    batch_size = batch_size,
    n_epochs = n_epochs,
    gamma = gamma,
    gae_lambda = 0.98,
    ent_coef = ent_coef,
    verbose=1,
    learning_rate = learning_rate,
    seed=0,
    tensorboard_log= tensorboard_log)

model.learn(total_timesteps=total_timesteps, tb_log_name=tb_log_name)


model.save(tensorboard_log+'model_'+tb_log_name)




In [None]:
%tensorboard --logdir "$tensorboard_log"


### Testing

In [None]:
dataset_name = 'RCV1'


qrels = "data/qrels/rcv1_qrels_selected_45_0.2.txt" # use the same qrel list as their rankings


qrel_fname, query_rel_dic = load_rel_data(qrels)
print("Number of topics:", len(query_rel_dic))

run = "data/rankings/RCV1_selected_45_0.2_ranking.txt"

doc_rank_dic, rank_rel_dic = load_run_data(run)

topics_list = make_topics_list(doc_rank_dic,1)  # sort topics by no docs


In [None]:
# Instantiate the vec env

#random topic selection for each env instance
SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics

vec_env = make_vec_env(TAREnv, n_envs=len(topics_list), env_kwargs=dict(target_recall=target_recall, topics_list = topics_list, topic_id=None, size=100, render_mode='human'))

SELECTED_TOPICS = [] # reset before/after each call, keep track of all randomly selected topics


In [None]:
test_size = len(topics_list)
vec_env_test = vec_env

#### TAR Eval

In [None]:

df = pd.DataFrame()
df_all_runs = pd.DataFrame()

for run in range(total_runs):

  # Test the trained agent
  vec_env_test = vec_env
  obs = vec_env_test.reset()
  test_steps = 100


  n_env = test_size
  agent=0
  target=0
  agent_vector=[]
  terminal_observation=[]

  topics = []
  recalls = []
  costs=[]
  e_costs = []
  reliabilities = []
  rewards = []
  distances = []
  differences = []
  targets = []
  run_cnts = []

  for eID in range(test_size):

    env = vec_env_test.envs[eID]
    obs, info = env.reset()

    for step in range(test_steps):
      action, _ = model.predict(obs, deterministic=False) # predict all next steps
      obs, reward, done, trun,info = env.step(action)


      if done or trun:
                  topic_id = info['topic_id']
                  recall = info['recall']
                  cost = info['cost']
                  e_cost =  ((info['agent'] - info['target']) / (100-info['target']))
                  distance = info['distance']

                  agent = info['agent']
                  target = info['target']
                  agent_vector = info['agent_vector']
                  terminal_observation = info['terminal_observation']

                  difference = target_recall - recall

                  reliability = 1 if recall >= target_recall else 0
                  topics.append(topic_id)
                  recalls.append(recall)
                  costs.append(cost)
                  e_costs.append(e_cost)
                  reliabilities.append(reliability)
                  rewards.append(reward)
                  distances.append(distance)
                  targets.append(target)
                  run_cnts.append(run)
                  differences.append(difference)

                  df_tmp = pd.DataFrame( list(zip([dataset_name]*len(topics_list), topics, run_cnts, recalls, reliabilities, costs, e_costs, rewards, differences, distances, targets)),
                  columns =['Dataset', 'Topic', 'Run', 'Recall', 'Reliability', 'Cost', 'e-Cost', 'Reward', 'Difference', 'Distance', 'Target'])

                  df = pd.concat([df_tmp])

                  df.groupby('Topic').mean()

                  break

  display(df)
  df.groupby('Topic').mean()
  df_all_runs = pd.concat([df_all_runs, df])

df_all_runs['Model'] = model_name
df_all_runs['Model_settings'] = tb_log_name
df_all_runs['Target_Recall'] = target_recall
df_all_runs

In [None]:
display(df_all_runs.groupby('Topic').mean())
display(df_all_runs.groupby('Topic').std())


#### df_all_targets

In [None]:

df_all_targets = pd.concat([df_all_targets, df_all_runs], ignore_index = True)


display(df_all_targets)

df_all_targets.describe()

In [None]:
display(df_all_targets.groupby(['Target_Recall','Dataset']).mean().round(3))
display(df_all_targets.groupby(['Target_Recall','Dataset']).std().round(3))

### vis all datasets

In [None]:
display(df_all_targets.groupby(['Target_Recall','Dataset']).mean().round(3))

In [None]:
(df_all_targets[['Target_Recall', 'Dataset', 'Recall' , 'Reliability', 'Cost', 'e-Cost']].groupby(['Target_Recall','Dataset']).mean().round(3)).to_latex()


