In [1]:
import sys
sys.path.append("../")

In [2]:
import os

import numpy as np
import pandas as pd
import random
import joblib

from agents.matchmanager import buildMatchManager
from core.game.state import vectorStateInfo, vectorState

from datetime import datetime
from multiprocessing import Pool

In [3]:
agents = [
    'RandomAgent', 
#     'AlphaBetaAgent', 
    'GreedyAgent'
]
scenarios = [
#     'scenarioJunctionExo', 
    'scenarioJunction', 
]

In [4]:
def game(args):
    scenario, red, blue, seed = args
    vectors=[]

    output = f'../data/{scenario}.{red}.{blue}.{seed}.pkl.gz'
    
    if os.path.exists(output):
        return
    
    mm = buildMatchManager('', scenario, red, blue, seed=seed)
    
    while not mm.end:
        mm.nextStep()

    cols = vectorStateInfo()
    data = [vectorState(x) for x in mm.states_history]

    df = pd.DataFrame(columns=cols, data=data)
    
    df['winner'] = mm.winner
    df['meta_p_red'] = red
    df['meta_p_blue'] = blue
    df['meta_seed'] = seed
    df['meta_scenario'] = scenario
    
    df.to_pickle(output, compression='gzip')


In [5]:
args = []

np.random.seed(20210215)

for _ in range(128):
    scenario = np.random.choice(scenarios)
    red = np.random.choice(agents)
    blue = np.random.choice(agents)
    seed = np.random.randint(100000000, 999999999)
    
    args.append((scenario, red, blue, seed))

In [6]:
with Pool(64) as p:
    p.map(game, args)

Compress everything in a single file, for each scenario

In [7]:
def compress(scenario):
    today = datetime.today().strftime('%Y-%m-%d')
    data_dir = '../data'
    
    files = [f for f in os.listdir(data_dir) if scenario in f]
    
    dfs = [
        pd.concat([
            pd.read_pickle(os.path.join(data_dir, f), compression='gzip') for f in files[i:i+1000]
        ]) for i in range(0, len(files), 1000)
    ]
    
    dfs = pd.concat(dfs)
    
    dfs.to_pickle(f'../data.{today}.{scenario}.pkl.gz', compression='gzip')
    
    return dfs

In [8]:
with Pool(len(scenarios)) as p:
    raw = p.map(compress, scenarios)

# Data

In [11]:
def dfForTeam(df: pd.DataFrame, team:str):
    df_new = df[[c for c in df.columns if team in c] + ['winner']].copy()
    df_new['winner'] = df['winner'].apply(lambda x: 1 if x == team else -1)
    return df_new

In [9]:
df = raw[0].dropna(axis=1).drop(['meta_scenario', 'meta_p_red', 'meta_p_blue', 'meta_seed'], axis=1)

In [10]:
df.shape

(5457, 292)

In [12]:
df_red = dfForTeam(df, "red")
df_blue = dfForTeam(df, "blue")

In [13]:
df_red.shape, df_blue.shape

((5457, 145), (5457, 145))

In [15]:
X_red = df_red.drop(['winner'], axis=1)
y_red = df_red['winner']

In [16]:
X_blue = df_blue.drop(['winner'], axis=1)
y_blue = df_blue['winner']

# Regressors

In [14]:
from sklearn.ensemble import RandomForestRegressor

In [17]:
rfr_red = RandomForestRegressor()
rfr_blue = RandomForestRegressor()

rfr_red.fit(X_red, y_red)
rfr_blue.fit(X_blue, y_blue)

joblib.dump(rfr_red, '../models/Junction_RandomForestRegressor_red_20210215.joblib')
joblib.dump(rfr_blue, '../models/Junction_RandomForestRegressor_blue_20210215.joblib')

# Classifiers

In [28]:
from sklearn.ensemble import RandomForestClassifier

In [29]:
rfc_red = RandomForestClassifier()
rfc_blue = RandomForestClassifier()

rfc_red.fit(X_red, y_red)
rfc_blue.fit(X_blue, y_blue)

joblib.dump(rfc_red, '../models/Junction_RandomForestClassifier_red_20210215.joblib')
joblib.dump(rfc_blue, '../models/Junction_RandomForestClassifier_blue_20210215.joblib')

['../models/Junction_RandomForestClassifier_blue_20210215.joblib']