In [None]:
# default_exp profiler


In [None]:
#hide
from nbdev.showdoc import *


In [None]:
#exporti

import re
import pymongo
import json
import sc2reader
import errno
import jsonschema
import os

import pandas as pd
import numpy as np

from typing import *
from pathlib import Path
from pprint import pprint
from jsonschema import validate
from dataclasses import dataclass, astuple, asdict, field

from sc_training.ingest import *

sc2reader.engine.register_plugin(CtrlGroupTracker())

# Section 2 - Player Profiler

## Introduction

In this section, I use the database built in Section 1.7 to define a `player_profiler` function that will take the data of each player's performances and then it will process it to compile three player profiles for each player, each corresponding to one of the play races of StarCraft 2. The section compiles this function into the `profiler` module.

The following code shows the players in the data base that have more than 2 replays in the database. 

In [None]:
working_db = set_up_db()

In [None]:
players_match_count = dict()
for rec in working_db['replays'].find():
    for player in rec['players']:
        if not (player['username'].startswith('A.I.') 
                or player['username'].startswith('llll')
                or player['username'].startswith('Player 2')):
            players_match_count.setdefault(player['username'], 0)
            players_match_count[player['username']] += 1
            
{name: count for name , count in players_match_count.items() if count >= 2}

{'HDEspino': 149,
 'DaveyC': 2,
 'Xnorms': 2,
 'Shah': 3,
 'Razer': 2,
 'gae': 2,
 'SenorCat': 2,
 'Worawit': 2,
 'aria': 2,
 'xiiaoyao': 2}

Of this players I will focus only on `HDEspino` given that the player has a substancial number of replays. 

In any case, once I have a list of user names in a database, I can extract all the replays replative to that player with simple queries to the data base. 

For example, the following queries extract all replays were `HDEspino` was playing either as player one or two.

In [None]:
print(len([rpl for rpl 
           in working_db['replays'].find({'players.0.username':'HDEspino',
                                          'players.0.race':'Protoss'})]))
print(len([rpl for rpl 
           in working_db['replays'].find({'players.1.username':'HDEspino',
                                          'players.1.race':'Protoss'})]))


91
39


Based on this list, I will build the Protoss profile for this player to illustrate what this process would entail.

First, I will query the system to identify the replays where the user was one of the players and was playing as Protoss. Then, I use that information to build a DataFrame containing all of the indicators for the player's performances in these replays.

In [None]:
player_1_protoss = [rpl['replay_name'] for rpl 
                   in working_db['replays'].
                      find({'players.0.username':'HDEspino', 
                            'players.0.race':'Protoss'},
                            {'replay_name':1, 'players':1})]

working_repls = {}
for rpl in player_1_protoss:
    for cur in working_db['indicators'].find({'replay_name':rpl, 
                                              'player_id': 1}, 
                                             {'_id':0, 'replay_name':0,
                                              'player_username':0,
                                              'player_id': 0}):
        working_repls[rpl] = cur
        
len(working_repls)

91

In [None]:
player_2_protoss = [rpl['replay_name'] for rpl 
                   in working_db['replays'].
                      find({'players.1.username':'HDEspino', 
                            'players.1.race':'Protoss'},
                            {'replay_name':1, 'players':1})]

for rpl in player_2_protoss:
    for cur in working_db['indicators'].find({'replay_name':rpl, 
                                              'player_id': 2}, 
                                             {'_id':0, 'replay_name':0,
                                              'player_username':0,
                                              'player_id': 0}):
        working_repls[rpl] = cur
        
len(working_repls)

130

In [None]:
working_df = pd.DataFrame(working_repls.values(), index=working_repls.keys())
[x for x in list(working_df.columns) if 'pref' in x]

['first_whole_pref_sab',
 'second_whole_pref_sab',
 'first_early_pref_sab',
 'second_early_pref_sab',
 'first_mid_pref_sab',
 'second_mid_pref_sab',
 'first_late_pref_sab',
 'second_late_pref_sab']

In [None]:
working_repls[['first_whole_pref_sab',
 'second_whole_pref_sab',
 'first_early_pref_sab',
 'second_early_pref_sab',
 'first_mid_pref_sab',
 'second_mid_pref_sab',
 'first_late_pref_sab',
 'second_late_pref_sab']]

TypeError: unhashable type: 'list'