In [1]:
import sys
sys.path.append("../")

In [2]:
import os

import numpy as np
import pandas as pd
import random
import joblib

from agents.matchmanager import buildMatchManager
from core.game.state import vectorStateInfo, vectorState

from datetime import datetime
from multiprocessing import Pool

In [3]:
agents = [
    'RandomAgent', 
#     'AlphaBetaAgent', 
    'GreedyAgent'
]
scenarios = [
#     'scenarioJunctionExo', 
    'scenarioJunction', 
]

In [4]:
def game(args):
    scenario, red, blue, seed = args
    vectors=[]

    output = f'../data/{scenario}.{red}.{blue}.{seed}.pkl.gz'
    
    if os.path.exists(output):
        return
        
    mm = buildMatchManager('', scenario, red, blue, seed=seed)
    
    while not mm.end:
        mm.nextStep()

    cols = vectorStateInfo()
    data = [vectorState(x) for x in mm.states_history]

    df = pd.DataFrame(columns=cols, data=data)
    
    df['winner'] = mm.winner
    df['meta_p_red'] = red
    df['meta_p_blue'] = blue
    
    df.to_pickle(output, compression='gzip')


In [5]:
args = []

np.random.seed(20210215)

for _ in range(1024):
    scenario = np.random.choice(scenarios)
    red = np.random.choice(agents)
    blue = np.random.choice(agents)
    seed = np.random.randint(100000000, 999999999)
    
    args.append((scenario, red, blue, seed))

In [6]:
with Pool(64,maxtasksperchild=10) as p:
    p.map(game, args)

Compress everything in a single file, for each scenario

In [7]:
def compress(scenario):
    today = datetime.today().strftime('%Y-%m-%d')
    data_dir = '../data'
    
    files = [f for f in os.listdir(data_dir) if scenario in f]
    
    dfs = [
        pd.concat([
            pd.read_pickle(os.path.join(data_dir, f), compression='gzip') for f in files[i:i+1000]
        ]) for i in range(0, len(files), 1000)
    ]
    
    dfs = pd.concat(dfs)
    
    dfs.to_pickle(f'../data.{today}.{scenario}.pkl.gz', compression='gzip')
    
    return dfs

In [8]:
with Pool(len(scenarios)) as p:
    p.map(compress, scenarios)

# Data

In [52]:
raw = pd.read_pickle('../data.2021-02-16.scenarioJunction.pkl.gz')\
        .drop(['meta_scenario', 'meta_p_red', 'meta_p_blue', 'meta_seed'], axis=1, errors="ignore")

## Vertical split

In [35]:
def dfForTeam(df: pd.DataFrame, team:str):
    df_new = df[[c for c in df.columns if team in c] + ['winner']].copy()
    df_new['label'] = df['winner'].apply(lambda x: 1 if x == team else -1)
    return df_new

In [36]:
df = raw.dropna(axis=1, how='all')

In [37]:
df.shape

(48265, 306)

In [38]:
df_red = dfForTeam(df, "red")
df_blue = dfForTeam(df, "blue")

In [39]:
df_red.shape, df_blue.shape

((48265, 146), (48265, 146))

## Horizontal Split

In [54]:
df = raw.dropna(axis=1, how='all')

In [71]:
for c in df.columns:
    if df[c].dtype == 'object':
        print(c, set(df[c]))

action_team {'red', 'blue', None}
action_type {'PassFigure', 'PassTeam', 'AttackRespond', 'Attack', 'LoadInto', None, 'Move'}
action_guard_id {nan, nan, 2.0, nan, nan, 1.0, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, na

In [55]:
df_red = df[df['action_team'] == 'red'].copy()
df_blue = df[df['action_team'] == 'blue'].copy()

In [56]:
df_red['label'] = df_red['winner'].apply(lambda x: 1 if x == 'red' else -1)
df_blue['label'] = df_blue['winner'].apply(lambda x: 1 if x == 'blue' else -1)

In [57]:
df_red.shape, df_blue.shape

((21981, 305), (25260, 305))

## Both

In [58]:
X_red = df_red.drop(['winner', 'label'], axis=1)
y_red = df_red['label']

X_blue = df_blue.drop(['winner'], axis=1)
y_blue = df_blue['label']

# Regressors

In [17]:
from sklearn.ensemble import RandomForestRegressor

In [68]:
from sklearn.preprocessing import OrdinalEncoder

In [69]:
X_red.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 21981 entries, 1 to 83
Columns: 303 entries, turn to response
dtypes: bool(90), float64(5), int64(199), object(9)
memory usage: 37.8+ MB


In [None]:
pipes = Pipeline(steps=[
    ('OrdEncoder', OrdinalEncoder(handle_unknown='use_encoded_value')),
    ('Regression', RandomForestRegressor())
])
    preprocessor = ColumnTransformer(transformers=[('cat', categorical_transformer, c)])
    classifiers = [
        ,
    ]

    for classifier in classifiers:
        pipe = Pipeline(steps=[('preprocessor', preprocessor),
                               ('scale', StandardScaler()),
                               ('classifier', classifier)])
        pipe.fit(X, y)
        file_name = f'{name}_{classifier.__class__.__name__}.joblib'
        joblib.dump(pipe, out + file_name)

In [59]:
rfr_red = RandomForestRegressor()
rfr_blue = RandomForestRegressor()

rfr_red.fit(X_red, y_red)
rfr_blue.fit(X_blue, y_blue)

joblib.dump(rfr_red, '../models/Junction_RandomForestRegressor_red_20210216_hori.joblib')
joblib.dump(rfr_blue, '../models/Junction_RandomForestRegressor_blue_20210216_hori.joblib')

ValueError: could not convert string to float: 'red'

# Classifiers

In [19]:
from sklearn.ensemble import RandomForestClassifier

In [42]:
rfc_red = RandomForestClassifier()
rfc_blue = RandomForestClassifier()

rfc_red.fit(X_red, y_red)
rfc_blue.fit(X_blue, y_blue)

joblib.dump(rfc_red, '../models/Junction_RandomForestClassifier_red_20210216_vert.joblib')
joblib.dump(rfc_blue, '../models/Junction_RandomForestClassifier_blue_20210216_vert.joblib')

['../models/Junction_RandomForestClassifier_blue_20210216_vert.joblib']