# Bayes Tree Strategy Prediction

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import MySQLdb.cursors
import MySQLdb

from bayes_tree.bayes_tree import bayes_tree  # contains bayes tree class
import config as cfg # contains unit lookups/helper functions/credentials

## Get Test Set and Prepare MySQL Cursors

In [2]:
database_list = ['sc_pvp','sc_pvt','sc_pvz','tvt','tvz','zvz']  
connection_list = []
cursor_list = []
for database in database_list:
    db = MySQLdb.connect(
    host = cfg.credentials['host'],
    user = cfg.credentials['user'],
    passwd = cfg.credentials['passwd'],
    database = database,
    cursorclass = MySQLdb.cursors.SSCursor)
    connection_list.append(db)
    cursor_list.append(db.cursor())

In [3]:
!ls pickles

clustered_replays.pickle player_races.pickle      raw_train_builds.pickle
memo_tree.pickle         raw_id_list.pickle       test_replays.pickle
obs_id_list.pickle       raw_obs.pickle           unit_counts.pickle


In [4]:
test_replays = cfg.from_pickle('pickles/test_replays.pickle')

In [5]:
test_replays.head()

Unnamed: 0,cluster,race,playerreplayID,cursor
"(sc_pvt, 2834)",1,1,2834,sc_pvt
"(sc_pvt, 3491)",1,1,3491,sc_pvt
"(sc_pvz, 2888)",13,0,2888,sc_pvz
"(sc_pvt, 3193)",1,1,3193,sc_pvt
"(sc_pvt, 4103)",1,1,4103,sc_pvt


## Generate Observation Logs

In [6]:
def get_obs(playerreplayIDs,time,cursor):
    n = 300
    results = []
    request_chunks = [playerreplayIDs[i * n:(i + 1) * n] for i in range((len(playerreplayIDs) + n - 1) // n )]
    for segment in request_chunks:
        playerreplayIDs_string = ",".join(str(x) for x in segment)
        query = """
            SELECT PlayerReplayID, visibilitychange.UnitID, UnitTypeID, ChangeVal,CAST(ChangeTime / 24 AS UNSIGNED) 
            FROM visibilitychange 
            LEFT JOIN unit ON visibilitychange.UnitID = unit.UnitID
            WHERE visibilitychange.ViewerID != unit.PlayerReplayID AND PlayerReplayID in ({}) AND ChangeTime <= {}
            ORDER BY ChangeTime, visibilitychange.UnitID;
        """.format(playerreplayIDs_string,time)
        print(query)
    
        cursor.execute(query)
        results += cursor.fetchall()
        
    print('done!')
    ids = set(map(lambda x:x[0], results))
    results_by_ids = [[y[1:] for y in results if y[0]==x] for x in ids]
    
    return results_by_ids,ids

def process_obs(results):

    obs_list_list = []
    
    for result in results:
        
        building_uniques = {}
        unit_memory = {}
        obs_list = []
        
        for obs in result:
            
            #building logic
            if obs[1] in cfg.buildings.keys():
                if obs[1] in building_uniques.keys():
                    if obs[0] in building_uniques[obs[1]]:
                        continue
                    building_uniques[obs[1]].append(obs[0])
                else:
                    building_uniques[obs[1]] = [obs[0]]
                
                obs_list.append( ( (obs[1],len(building_uniques[obs[1]])) ,obs[3]) )
                
                
            else:
                if obs[2] == 1:
                    if obs[1] in unit_memory.keys():
                        if obs[0] in unit_memory[obs[1]]:
                            continue
                        unit_memory[obs[1]].append(obs[0])
                    else:
                        unit_memory[obs[1]] = [obs[0]]
                else:
                    unit_memory[obs[1]].remove(obs[0])
                    
                current_count = len(unit_memory[obs[1]])
                    
                prev_max = max([x[0][1] for x in obs_list if x[0][0]==obs[1]]+[0])
                if current_count > prev_max:
                    obs_list.append( ( (obs[1],current_count) ,obs[3]) )
                    
        obs_list_list.append(obs_list)
                    
    return obs_list_list

In [7]:
try:
    raw_obs = cfg.from_pickle('pickles/raw_obs.pickle')
    obs_id_list = cfg.from_pickle('pickles/obs_id_list.pickle')
except:
    raw_obs = []
    obs_id_list = []
    for index,database in enumerate(database_list):
        playerreplayIDs = test_replays['playerreplayID'][test_replays['cursor'] == database]
        cursor = cursor_list[index]
        builds, ids = get_obs(playerreplayIDs,cfg.TIME-30*24,cursor)
        raw_obs.append(builds)
        obs_id_list.append(ids)
    
    cfg.to_pickle(raw_obs,'raw_obs')
    cfg.to_pickle(obs_id_list,'obs_id_list')

In [8]:
processed_obs = []
for obs in raw_obs:
    processed_obs.append(process_obs(obs))

In [9]:
all_obs = []
all_ids = []
all_unique_ids = []
all_races = []
for matchup_ind,matchup in enumerate(processed_obs):
    for match_ind,match in enumerate(matchup):
        
        live_id = list(obs_id_list[matchup_ind])[match_ind]
        all_ids.append(live_id)
        unique_id = (database_list[matchup_ind],live_id)
        all_unique_ids.append(unique_id)
        
        race = test_replays['race'][test_replays.index == unique_id].values[0]
        all_races.append(race)
           
        all_obs.append(match)

## Blind Guess

In [23]:
tree = cfg.from_pickle('pickles/memo_tree.pickle')

In [11]:
race_order = [2,1,0]
blind_prob = {}
for index,node in enumerate(tree.read_root.children):
    active_clusters = node.cluster
    total = 0
    for cluster in active_clusters.values():
        total += cluster
        
    for key in active_clusters.keys():
        active_clusters[key] = active_clusters[key] / total
    
    blind_prob[race_order[index]] = active_clusters

In [12]:
results_df = test_replays[['cluster','race']].copy()

In [13]:
results_df['blind'] = results_df.apply(lambda x: blind_prob[x[1]][x[0]],axis=1)

In [14]:
results_df.head()

Unnamed: 0,cluster,race,blind
"(sc_pvt, 2834)",1,1,0.208029
"(sc_pvt, 3491)",1,1,0.208029
"(sc_pvz, 2888)",13,0,0.124644
"(sc_pvt, 3193)",1,1,0.208029
"(sc_pvt, 4103)",1,1,0.208029


## Guess with Observations

In [15]:
results_df['bayes'] = None
results_df['num_obs'] = None

In [16]:
for index,live_id in enumerate(all_unique_ids[0:1]):
    live_obs = all_obs[index]
    results_df['num_obs'][results_df.index == live_id] = len(live_obs)
    tree.prepare
    for obs in live_obs:
        tree.update_tree(obs)
        
    prediction = tree.predict(cfg.TIME-30*24,'cluster')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
  x = np.asarray((x - loc)/scale, dtype=dtyp)


Freq Check: 0.9263766718882072
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Check: nan
Freq Chec

In [17]:
prediction

[]

In [33]:
#tree.prepare
prediction = tree.predict(1*60,'cluster')

In [34]:
sum([x[1] for x in prediction])

0.9505361744276682

In [43]:
pd.DataFrame([[cfg.tree_choices[x[0][0]],x[0][1],x[1]] for x in all_obs[150]])

Unnamed: 0,0,1,2
0,Protoss_Probe,1,160
1,Protoss_Dragoon,1,220
2,Protoss_Gateway,1,222
3,Protoss_Gateway,2,222
4,Protoss_CyberneticsCore,1,223
5,Protoss_Pylon,1,223
6,Protoss_Dragoon,2,224
7,Protoss_Nexus,1,226
8,Protoss_Probe,2,226
9,Protoss_Probe,3,227
