In [1]:
####################################################################
# 1. Libraries

import numpy as np
import pandas as pd
import os
from tqdm.notebook import tqdm

####################################################################

In [2]:
####################################################################
# 2. Global Variables


####################################################################

In [3]:
####################################################################
# 3. Classes

class UtilsEnvironment:
    
    def __init__(self, path_output, name_experiment=''):
        self.path_output = path_output
        self.name_experiment = name_experiment
        self.dataset = pd.DataFrame()
        self.num_bandits = 100
        self.iniListsFeatures()
         
    def iniListsFeatures(self):
        self.list_count = []
        self.list_count_opp = []
        self.list_count_consec_opp = []
        self.list_wins = []
        self.list_loss = []
        self.list_beta = []
        self.list_beta_std = []
        self.list_binom = []
        self.list_qwins = []
        self.list_target_probs = []
        
    def addRecord(self, step, agent, environment):
        dict_df_tmp = {}
        
        arr_values = (
                agent.arr_wins - agent.arr_loss    
                + agent.arr_count_opp            
                - (agent.arr_count_opp>0)*1.5   
                + agent.arr_consecutive_opp 
            ) / (agent.arr_wins + agent.arr_loss + agent.arr_count_opp) * agent.arr_max_thresh
        
        dict_df_tmp['list_count'] = agent.arr_count
        dict_df_tmp['list_count_me'] = agent.arr_count_me
        dict_df_tmp['list_max_thresh'] = agent.arr_max_thresh
        dict_df_tmp['list_count_opp'] = agent.arr_count_opp
        dict_df_tmp['list_count_consec_opp'] = agent.arr_consecutive_opp
        dict_df_tmp['list_last_pull_opp'] = agent.arr_last_pull_opp
        dict_df_tmp['list_vegas'] = arr_values
        dict_df_tmp['list_wins'] = agent.arr_wins
        dict_df_tmp['list_loss'] = agent.arr_loss
        dict_df_tmp['list_pc_wins'] = (agent.arr_wins - 1) / agent.arr_count_me
        dict_df_tmp['list_pc_count_me'] = agent.arr_count_me / agent.arr_count
        dict_df_tmp['list_beta'] = agent.arr_beta_distribs
        dict_df_tmp['list_beta_std'] = agent.arr_beta_distribs_std
        dict_df_tmp['list_qwins'] = agent.arr_q_wins
        dict_df_tmp['list_id_bandit'] = np.arange(0, self.num_bandits)
        dict_df_tmp['list_prob'] = environment.arr_probs    
        
        #step and experiment vars 
        dict_df_tmp['experiment'] = np.repeat(self.name_experiment, self.num_bandits)
        dict_df_tmp['step'] = np.repeat(step, self.num_bandits)
        
        df_tmp=pd.DataFrame(dict_df_tmp)
        self.dataset = pd.concat([self.dataset, df_tmp], axis=0).reset_index(drop=True)
                    
    
    def dumpDataset(self):      
        self.dataset.to_csv(self.path_output + self.name_experiment + '.csv', index=False)
        
    
    
class MultiArmedBanditsEnvironment:
    
    def __init__(self, debug):
        self.n_bandits = 100
        self.arr_probs = np.random.uniform(size=100)
        self.arr_counts = np.zeros(self.n_bandits)
        self.cum_reward = [0, 0]
        self.cum_prob = [0, 0]
        self.list_cum_reward_agent_0, self.list_cum_reward_agent_1 = [], []
        self.list_cum_prob_agent_0, self.list_cum_prob_agent_1 = [], []
        self.debug = debug        
        
    def pull(self, step, action_opp, action_me):
        if self.debug:
            print(f"Step: {step}")
            print(f"Prob action Me: {np.round(self.arr_probs[action_me], 4)}")
            print(f"Prob action Opp: {np.round(self.arr_probs[action_opp], 4)}")
            print('=='*20)
            
        rn_me = np.random.random()
        rn_enemy = np.random.random()
        
        if rn_me < self.arr_probs[action_me]:
            reward_agent_me = 1
        else: 
            reward_agent_me = 0
            
        if rn_enemy < self.arr_probs[action_opp]:
            reward_agent_opp = 1
        else: 
            reward_agent_opp = 0
        
        self.cum_reward[0]+=reward_agent_opp
        self.cum_reward[1]+=reward_agent_me
        
        self.cum_prob[0]+=self.arr_probs[action_opp]
        self.cum_prob[1]+=self.arr_probs[action_me]
        
        self.list_cum_reward_agent_0.append(self.cum_reward[0])
        self.list_cum_reward_agent_1.append(self.cum_reward[1])
        
        self.list_cum_prob_agent_0.append(self.cum_prob[0])
        self.list_cum_prob_agent_1.append(self.cum_prob[1])
        
        self.arr_counts[action_me] += 1
        self.arr_counts[action_opp] += 1
        
        self.arr_probs[action_me] = self.arr_probs[action_me] * 0.97
        self.arr_probs[action_opp] = self.arr_probs[action_opp] * 0.97
        
        return self.cum_reward[1], self.cum_reward[0], action_me, action_opp, reward_agent_opp
    

####################################################################

In [4]:
####################################################################
# 4. Game Simulation

num_simulations = 5

## 4.1 Initialize objects

#from agent_class_vegas_slot_v0_1 import AgentRules as AgentOpp
from agent_class_vegas_slot_v0_1 import AgentRules as AgentOpp
from agent_model_offline_regressor_v0_11 import AgentRules as AgentMe


## 4.2 Game
for sim in tqdm(range(num_simulations)):
    utils = UtilsEnvironment(path_output='../03_Datasets/Dataset_v0_7/', 
                             name_experiment=f'agent_model_offline_regressor_v0_11_vs_agent_class_vegas_slot_v0_1_1_{sim}')
    mba = MultiArmedBanditsEnvironment(debug=False)

    agent_opp = AgentOpp(debug=False)
    agent_me = AgentMe(debug=False)
    
    configuration = {'episodeSteps': 2000, 'actTimeout': 0.25, 'runTimeout': 1200, 
                 'banditCount': 100, 'decayRate': 0.97, 'sampleResolution': 100}
    # 0-opp / 1-me
    observation_opp = {'remainingOverageTime': 60, 'agentIndex': 0, 'reward': 0, 'step': 0, 'lastActions': []}
    observation_me = {'remainingOverageTime': 60, 'agentIndex': 1, 'reward': 0, 'step': 0, 'lastActions': []}
    
    # simulation
    for step in range(2_000):    
        if step>=1:
            observation_me['reward'] = reward_agent_me
            observation_me['step'] = step
            observation_me['lastActions'] = [action_opp, action_me]

            observation_opp['reward'] = reward_agent_opp
            observation_opp['step'] = step
            observation_opp['lastActions'] = [action_opp, action_me]


        action_me = agent_me.predict(observation_me, configuration)
        action_opp = agent_opp.predict(observation_opp, configuration)
        
        utils.addRecord(step, agent_me, mba)

        reward_agent_me, reward_agent_opp, action_me, action_opp, last_rew_agent_opp = mba.pull(step, action_opp, action_me)

    utils.dumpDataset()

    

####################################################################

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

  self.arr_beta_skewness = (2*(self.arr_wins-self.arr_loss)*np.sqrt(self.arr_wins+self.arr_loss+1))/((self.arr_wins+self.arr_loss+2)*np.sqrt(self.arr_wins*self.arr_loss))
  self.arr_beta_skewness = (2*(self.arr_wins-self.arr_loss)*np.sqrt(self.arr_wins+self.arr_loss+1))/((self.arr_wins+self.arr_loss+2)*np.sqrt(self.arr_wins*self.arr_loss))
  'list_pc_wins' : (self.arr_wins - 1) / self.arr_count_me,
  'list_pc_count_me' : self.arr_count_me / self.arr_count,





In [5]:
####################################################################
# 5. Join Datasets

path_input = '../03_Datasets/Dataset_v0_7/'
name_dataset = 'dataset.csv'

df_dataset = pd.DataFrame()
for file in tqdm(os.listdir(path_input)):
    if not os.path.isdir(file) and file.split('.csv')[0][:7]!='dataset':
        df_dataset = pd.concat([df_dataset, pd.read_csv(path_input + file)], axis=0).reset_index(drop=True)
        
df_dataset.to_csv(path_input + name_dataset, index=False)

####################################################################

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=36.0), HTML(value='')))


