In [None]:
import sys
sys.path.append("../")

In [None]:
import os

import numpy as np
import pandas as pd
import joblib

from agents.matchmanager import buildMatchManager
from core.game.state import vectorStateInfo, vectorState, vectorActionInfo, vectorAction

from datetime import datetime
from multiprocessing import Pool

In [None]:
agents = [
#     'RandomAgent', 
#     'AlphaBetaAgent', 
    'GreedyAgent'
]
scenarios = [
#     'scenarioJunctionExo', 
    'scenarioJunction', 
]

In [None]:
def game(args):
    scenario, red, blue, seed = args

    output = f'../data/{scenario}.{red}.{blue}.{seed}.pkl.gz'
    
    if os.path.exists(output):
        return
        
    mm = buildMatchManager('', scenario, red, blue, seed=seed)
    
    while not mm.end:
        mm.nextStep()

    states_cols = vectorStateInfo()
    states_data = [vectorState(x) for x in mm.states_history]
    df_state = pd.DataFrame(columns=states_cols, data=states_data)

    actions_cols = vectorActionInfo()
    actions_data = [vectorAction(x) for x in mm.actions_history]
    df_action = pd.DataFrame(columns=actions_cols, data=actions_data)

    df = pd.concat([df_state, df_action], axis=1)

    df['winner'] = mm.winner
    df['meta_p_red'] = red
    df['meta_p_blue'] = blue
    
    df.to_pickle(output, compression='gzip')


In [None]:
args = []

np.random.seed(20210215)

for _ in range(1024):
    scenario = np.random.choice(scenarios)
    red = np.random.choice(agents)
    blue = np.random.choice(agents)
    seed = np.random.randint(100000000, 999999999)
    
    args.append((scenario, red, blue, seed))

In [None]:
with Pool(64,maxtasksperchild=10) as p:
    p.map(game, args)

Compress everything in a single file, for each scenario

In [None]:
def compress(scenario):
    today = datetime.today().strftime('%Y-%m-%d')
    data_dir = '../data'
    
    files = [f for f in os.listdir(data_dir) if scenario in f]
    
    dfs = [
        pd.concat([
            pd.read_pickle(os.path.join(data_dir, f), compression='gzip') for f in files[i:i+1000]
        ]) for i in range(0, len(files), 1000)
    ]
    
    dfs = pd.concat(dfs)
    
    dfs.to_pickle(f'../data.{today}.{scenario}.pkl.gz', compression='gzip')
    
    return dfs

In [None]:
with Pool(len(scenarios)) as p:
    p.map(compress, scenarios)

# Data

In [None]:
raw = pd.read_pickle('../data.2021-02-17.scenarioJunction.pkl.gz')\
        .drop(['meta_scenario', 'meta_p_red', 'meta_p_blue', 'meta_seed'], axis=1, errors="ignore")

In [None]:
raw.shape

In [None]:
df = raw.dropna(axis=1, how='all')

In [None]:
df.shape

In [None]:
df_red = df[df['action_team'] == 'red'].copy().drop('action_team', axis=1)
df_blue = df[df['action_team'] == 'blue'].copy().drop('action_team', axis=1)

In [None]:
df_red['label'] = df_red['winner'].apply(lambda x: 1 if x == 'red' else -1)
df_blue['label'] = df_blue['winner'].apply(lambda x: 1 if x == 'blue' else -1)

In [None]:
df_red.shape, df_blue.shape

In [None]:
X_red = df_red.drop(['winner', 'label'], axis=1)
y_red = df_red['label']

X_blue = df_blue.drop(['winner', 'label'], axis=1)
y_blue = df_blue['label']

In [None]:
X = pd.concat([X_red, X_blue])
y = pd.concat([y_red, y_blue])

In [None]:
X.shape, y.shape, X_red.shape, y_red.shape, X_blue.shape, y_blue.shape

# Regressors

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rfr_red = RandomForestRegressor()
rfr_blue = RandomForestRegressor()
rfr = RandomForestRegressor()

rfr_red.fit(X_red, y_red)
rfr_blue.fit(X_blue, y_blue)
rfr.fit(X, y)

joblib.dump(rfr_red, '../models/Junction_RandomForestRegressor_red_20210217.joblib')
joblib.dump(rfr_blue, '../models/Junction_RandomForestRegressor_blue_20210217.joblib')
joblib.dump(rfr, '../models/Junction_RandomForestRegressor_20210217.joblib')

# Classifiers

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rfc_red = RandomForestClassifier()
rfc_blue = RandomForestClassifier()
rfc = RandomForestClassifier()

rfc_red.fit(X_red, y_red)
rfc_blue.fit(X_blue, y_blue)
rfc.fit(X, y)

joblib.dump(rfc_red, '../models/Junction_RandomForestClassifier_red_20210217.joblib')
joblib.dump(rfc_blue, '../models/Junction_RandomForestClassifier_blue_20210217.joblib')
joblib.dump(rfc, '../models/Junction_RandomForestClassifier_20210217.joblib')