# Getting The Blue Alliance Data Through Web API V3
___

## Introduction
This jupyer notebook contains all of the data collection and processing for this project. [The Blue Alliance Web API V3](https://www.thebluealliance.com/apidocs/v3) is where the data is collected from. Then the data is processed, with some processes taking longer than others. Running these functions for the first time will take a **long time** (probably around 8 hours). To avoid having to collect the data and run the necessary processing on it, the raw data from the blue alliance, as well as the processed data, is cached in the "cache/" directory. The components of those directories are in the .gitignore file, because of its size.
***

### Get All Events
get_all_events is a function that gets all of the event tags for a given week or weeks. These tags are used to retrieve specific data from The Blue Alliance though it's web API. Currently get_all_events includes both regional and district competitions. There is a total of six weeks of competitions, so week must be less than or equal to 6.

In [1]:
def get_all_events(week=6, only=False):
    """get_all_events is a function that gets all of the event tags for a given week or weeks.
    These tags are used to retrieve specific data from The Blue Alliance though it's web API.
    Currently get_all_events includes both regional and district competitions. There is a
    total of six weeks of competitions, so week must be less than or equal to 6.
    
    Examples:
        get_all_events(6, False) will get all event tags for weeks 1 thru 6 (inclusive)
        get_all_events(6, True) will only get event tags for week 6
        
    Event type numbers for reference:
        0 == regional competitions
        1 == district competitions
    """
    
    import urllib3
    from pandas import read_json
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    http = urllib3.PoolManager()
    auth_key = open('auth.key', 'r').read()
    
    assert week <= 6, 'Week is not <= 6'
    
    r = http.request('GET', 'https://www.thebluealliance.com/api/v3/events/2016',
                    headers={'X-TBA-Auth-Key': auth_key})
    all_events = read_json(r.data.decode('utf-8'))
    #print(all_events['week'])
    events = []
    for i, j in enumerate(all_events['event_type'].values):
        if j == 0 or j == 1:
            if only:
                if all_events['week'][i] == week:
                    events.append(all_events['key'][i])
            else:
                if all_events['week'][i] <= week:
                    events.append(all_events['key'][i])
    return events

In [2]:
#get_all_events(2, True)

### Get Rankings
get_rankings is a function that gets the team ranking information for a given event. The event must be in the format 2016abcd, where 2016 is the year and abcd is the event key. The columns with "_a" appending the statistic is the average of that statistic of how many matches the team played. The ranking data is cached in the directory 'cache/rankings/' after the first run, but can be reloaded with new=True.

In [7]:
def get_rankings(event, new=False):
    """get_rankings is a function that gets the team ranking information for a
    given event. The event must be in the format 2016abcd, where 2016 is the year
    and abcd is the event key. The columns with "_a" appending the statistic is
    the average of that statistic of how many matches the team played. The ranking
    data is cached in the directory 'cache/rankings/' after the first run, but can
    be reloaded with new=True. The output shape is (number of teams, 12). 
    
    Examples:
        get_rankings('2016mokc') returns the ranking data for event mokc in year
            2016 and caches it if is not already
        get_rankings('2016abca', True) forces get_ranking to update the cache for
            2016abca and returns the ranking data
    """
    #print(loads(r.data.decode('utf-8'))['sort_order_info'])
    import urllib3
    from pandas import DataFrame
    from json import loads, dump, load
    from os.path import isfile
    from numpy import array as nparray
    
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    http = urllib3.PoolManager()
    auth_key = open('auth.key', 'r').read()
    
    if isfile('cache/rankings/'+ event +'.json') and not new:
        with open('cache/rankings/'+ event +'.json', 'r') as file:
            ranks = load(file)
    else:
        r = http.request('GET', 'https://www.thebluealliance.com/api/v3/event/'+ event +'/rankings',
                         headers={'X-TBA-Auth-Key': auth_key})
        ranks = loads(r.data.decode('utf-8'))['rankings']
        with open('cache/rankings/'+ event +'.json', 'w') as file:
            dump(loads(r.data.decode('utf-8'))['rankings'], file)
            
    ranks = DataFrame.from_dict(ranks).set_index('team_key').drop(['dq', 'qual_average', 'record'], axis=1)
    stats = nparray(list(ranks['sort_orders'].values))
    ranks['Ranking_Score'] = stats[:,0]
    ranks['Ranking_Score_a'] = stats[:,0]/ranks['matches_played']
    ranks['Auto'] = stats[:,1]
    ranks['Auto_a'] = stats[:,1]/ranks['matches_played']
    ranks['Scale/Challenge'] = stats[:,2]
    ranks['Scale/Challenge_a'] = stats[:,2]/ranks['matches_played']
    ranks['Goals'] = stats[:,3]
    ranks['Goals_a'] = stats[:,3]/ranks['matches_played']
    ranks['Defense'] = stats[:,4]
    ranks['Defense_a'] = stats[:,4]/ranks['matches_played']
    ranks['Ranking_Score_a'] = nparray(list(ranks['extra_stats'].values)).reshape(-1,)
    ranks.drop(['sort_orders','extra_stats'], axis=1, inplace=True)
    return ranks

In [8]:
#get_rankings('2016abca')

### Get Matches
get_matches is a function that the match by match data for a given event tag. get_matches follows the same event tag and cache policies as get_rankings, with the cache being in 'cache/matches/'. get_matches also one-hot encodes data columns with multiple non-numerical inputs. This was done by hand to ensure proper one-hot encoding without redudancy. The output shape is (number of matches, 173).

In [9]:
def get_matches(event, new=False):
    """get_matches is a function that the match by match data for a given event tag.
    get_matches follows the same event tag and cache policies as get_rankings.
    get_matches also one-hot encodes data columns with multiple non-numerical inputs.
    This was done by hand to ensure proper one-hot encoding without redudancy. The
    output shape is (number of matches, 173).
    
    Examples:
        get_matches('2016mokc') returns the match data and caches the data if not
            already done
        get_matches('2016abca', True) forces get_matches to update the cache and
            returns the match data
    """
    
    import urllib3
    from pandas import read_json
    from os.path import isfile
    from json import dump, loads
    from numpy import array as nparray
    from numpy import where
    
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    http = urllib3.PoolManager()
    auth_key = open('auth.key', 'r').read()
    
    if isfile('cache/matches/'+ event +'.json') and not new:
        with open('cache/matches/'+ event +'.json', 'r') as file:
            matches = read_json(file)
    else:
        r = http.request('GET', 'https://www.thebluealliance.com/api/v3/event/'+ event +'/matches',
                         headers={'X-TBA-Auth-Key': auth_key})
        matches = read_json(r.data.decode('utf-8'))
        with open('cache/matches/'+ event +'.json', 'w') as file:
            dump(loads(r.data.decode('utf-8')), file)
    
    matches = matches.set_index(['comp_level', 'match_number']).drop(['actual_time', 'post_result_time',
                                                                                'predicted_time', 'videos',
                                                                                'time', 'set_number',
                                                                                'event_key', 'key'], axis=1)
    alliances = matches['alliances']
    blue_teams = []
    blue_score = []
    red_teams = []
    red_score = []
    for thing in alliances:
        blue_teams.append(thing['blue']['team_keys'])
        blue_score.append(thing['blue']['score'])
        red_teams.append(thing['red']['team_keys'])
        red_score.append(thing['red']['score'])
    blue_teams = nparray(blue_teams)
    red_teams = nparray(red_teams)
    matches['blue_team1'] = blue_teams[:,0]
    matches['blue_team2'] = blue_teams[:,1]
    matches['blue_team3'] = blue_teams[:,2]
    matches['blue_score'] = blue_score
    matches['red_team1'] = red_teams[:,0]
    matches['red_team2'] = red_teams[:,1]
    matches['red_team3'] = red_teams[:,2]
    matches['red_score'] = red_score
    matches.drop(['alliances'], axis=1, inplace=True)

    keys = list(matches['score_breakdown']['qm'][1]['blue'].keys())
    blue_stats = {}
    red_stats = {}
    for key in keys:
        blue_stats[key] = []
        red_stats[key] = []

    for thing in matches['score_breakdown']:
        try:
            thing.keys()
            for key in keys:
                blue_stats[key].append(thing['blue'][key])
                red_stats[key].append(thing['red'][key])
        except AttributeError:
            for key in keys:
                blue_stats[key].append(-0)
                red_stats[key].append(-0)

    for key in keys:
        matches['blue_'+key] = blue_stats[key]
        matches['red_'+key] = red_stats[key]

    for key in ['blue_teleopDefensesBreached', 'red_teleopDefensesBreached',
                'blue_teleopTowerCaptured', 'red_teleopTowerCaptured']:
        tmp = [1 if x else 0 for x in matches[key]]
        matches[key] = tmp

    matches.drop('score_breakdown', axis=1, inplace=True)
    matches = matches.fillna(0)
    
    #one hot for defense positions
    positions = []
    for i in range(2,6):
        for j in ['blue_', 'red_']:
            for k in ['A_ChevalDeFrise', 'B_Ramparts', 'C_Drawbridge', 'B_Moat', 'A_Portcullis',
                      'D_RoughTerrain', 'C_SallyPort', 'D_RockWall']:
                positions.append(j+'position'+str(i)+'_'+k)

    for item in positions:
        if item[0] == 'b':
            matches[item] = where(matches[item[:14]].values==item[15:], 1, 0)
        else:
            matches[item] = where(matches[item[:13]].values==item[14:], 1, 0)

    for i in range(2,6):
        for j in ['blue_', 'red_']:
            matches.drop(j+'position'+str(i), axis=1, inplace=True)
           
    #one hot for robot auto position 
    robots = []
    for i in range(1,4):
        for j in ['blue_', 'red_']:
            for k in ['None', 'Reached', 'Crossed']:
                robots.append(j+'robot'+str(i)+'Auto_'+k)

    for item in robots:
        if item[0] == 'b':
            matches[item] = where(matches[item[:15]].values==item[16:], 1, 0)
        else:
            matches[item] = where(matches[item[:14]].values==item[15:], 1, 0)

    for i in range(1,4):
        for j in ['blue_', 'red_']:
            matches.drop(j+'robot'+str(i)+'Auto', axis=1, inplace=True)
            
    #one-hot for end-game tower action things, yeah
    towers = []
    for i in ['A', 'B', 'C']:
        for j in ['blue_', 'red_']:
            for k in ['None', 'Challenged', 'Scaled', 'Both', 'Unknown']:
                towers.append(j+'towerFace'+i+'_'+k)
    matches['blue_towerFaceA']
    for item in towers:
        if item[0] == 'b':
            matches[item] = where(matches[item[:15]].values==item[16:], 1, 0)
        else:
            matches[item] = where(matches[item[:14]].values==item[15:], 1, 0)

    for i in ['A', 'B', 'C']:
        for j in ['blue_', 'red_']:
            matches.drop(j+'towerFace'+i, axis=1, inplace=True)

    for i in ['A', 'B', 'C']:
        for j in ['blue_', 'red_']:
            for k in ['Unknown']:
                for l in range(len(matches)):
                    test0 = matches[j+'towerFace'+i+'_'+k][l]
                    if test0:
                        test1 = matches[j+'teleopChallengePoints'][l]-(matches[j+'towerFace'+'A'+'_'+'Challenged'][l]+matches[j+'towerFace'+'B'+'_'+'Challenged'][l]+matches[j+'towerFace'+'C'+'_'+'Challenged'][l])*5
                        test2 = matches[j+'teleopScalePoints'][l]-(matches[j+'towerFace'+'A'+'_'+'Scaled'][l]+matches[j+'towerFace'+'B'+'_'+'Scaled'][l]+matches[j+'towerFace'+'C'+'_'+'Scaled'][l])*15
                        test3 = (matches[j+'towerFace'+'A'+'_'+'Both'][l]+matches[j+'towerFace'+'B'+'_'+'Both'][l]+matches[j+'towerFace'+'C'+'_'+'Both'][l])
                        if test3 == 1:
                            test1 -= 5
                            test2 -= 15
                            if (test1 == 0.0) and (test2 == 0.0):
                                pass
                            else:
                                print(event, j+'towerFace'+i+'_'+k, l, test0, test1, test2, test3)
                                raise ValueError("Unknown is undetermined")
                        elif test3 == 0:
                            if (test1 == 0.0) and (test2 == 0.0):
                                pass
                            else:
                                print(event, j+'towerFace'+i+'_'+k, l, test0, test1, test2, test3)
                                raise ValueError("Unknown is undetermined")
                        else:
                            print(event, j+'towerFace'+i+'_'+k, l, test0, test1, test2, test3)
                            raise ValueError("Unknown both is undetermined")

    for i in ['A', 'B', 'C']:
        for j in ['blue_', 'red_']:
            for k in ['Unknown']:
                matches.drop([j+'towerFace'+i+'_'+k], axis=1, inplace=True)
        
    #matches.to_csv('test.csv', sep=',')
    return matches

In [10]:
#get_matches('2016mokc')

### Get FIRST Predictions

In [11]:
def get_first_pred(event, new=False):
    import urllib3
    from json import loads, dump, load
    from os.path import isfile
    from pandas import DataFrame
    from numpy import array as nparray
    import warnings
    warnings.filterwarnings('ignore')
    
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    http = urllib3.PoolManager()
    auth_key = open('auth.key', 'r').read()
    
    if isfile('cache/first_pred/'+ event +'.json') and not new:
        with open('cache/first_pred/'+ event +'.json', 'r') as file:
            preds = load(file)
    else:
        r = http.request('GET', 'https://www.thebluealliance.com/api/v3/event/'+ event +'/predictions',
                         headers={'X-TBA-Auth-Key': auth_key})
        preds = loads(r.data.decode('utf-8'))['match_predictions']['qual']
        with open('cache/first_pred/'+ event +'.json', 'w') as file:
            dump(loads(r.data.decode('utf-8'))['match_predictions']['qual'], file)

    matches = get_matches(event)
    
    keys = preds.keys()
    sort_fun = lambda x: (len(x), x[-2:])
    keys = sorted(keys, key=sort_fun)
    n = 1
    m = 0
    data = []
    for key in keys:
        pred = preds[key]['winning_alliance']
        actual = matches.loc[('qm', n)]['winning_alliance'].values[0]
        if pred == actual:
            m += 1
        data.append((pred, actual, m/n))
        n += 1
        
    data = nparray(data)
    
    return data

In [12]:
#get_first_pred('2016abca')

___
### Statistics Generator
stats_gen is a function that adds a series of statistics for teams in a given event. These statistics include OPR, DPR, CCWM, OAVE, DAVE, and CPR. More information about these statistics can be found [here](https://www.chiefdelphi.com/forums/showthread.php?threadid=137451). These statistics are not cached here for more_team_stats, gradual_predictions_qm, and gradual_predictions_qm_diff all use this function in different capacities and those functions are cached. A good demonstration of this function's use is in the function more_team_stats.

In [13]:
def stats_gen(appearances, all_stat, teams, app, rankings, if_pinv=False):
    """stats_gen is a function that adds a series of statistics for teams in a given
    event. These statistics include OPR, DPR, CCWM, OAVE, DAVE, and CPR. More
    information about these statistics can be found at the link below
    
    https://www.chiefdelphi.com/forums/showthread.php?threadid=137451
    
    These statistics are not cached here for more_team_stats, gradual_predictions_qm,
    and gradual_predictions_qm_diff all use this function in different capacities and
    those functions are cached. A good demonstration of this function's use is in the
    function more_team_stats.
    """
    
    from numpy.linalg import inv, pinv, LinAlgError
    from numpy import all as npall
    from numpy import matmul, delete, std, mean
    from pandas import DataFrame
    
    #print(appearances)
    #print(type(appearances))
    
    to_delete = []
    for i, line in enumerate(appearances):
        if sum(line) == 0:
            to_delete.append(i)

    if len(to_delete) > 0:
        raise ValueError("A team is where they shouldn't be in the appearances array")
        '''
        for j, i in enumerate(to_delete):
            i -= j
            appearances = delete(appearances, i, axis=0)
            appearances = delete(appearances, i, axis=1)
        '''

    try:
        if if_pinv:
            #print('pinv')
            app_inv = pinv(appearances)
        else:
            #print('inv')
            app_inv = inv(appearances)
    except LinAlgError:
        #print('pinv')
        app_inv = pinv(appearances)

    #print(app)
    #print(all_stat.columns.values)    
    
    all_stat.drop(['blue_totalPoints', 'red_totalPoints', 'winning_alliance', 'blue_team1', 'blue_team2', 'blue_team3',
                   'red_team1', 'red_team2', 'red_team3', 'blue_adjustPoints', 'red_adjustPoints', 'blue_score',
                   'red_score'], axis=1, inplace=True)
    team_stats = DataFrame(index=teams)

    for i in ['Auto_None_a', 'Auto_Reached_a', 'Auto_Crossed_a', 'Score', 'DScore']:
        team_stats[i] = 0
    
    for i in ['Auto_None_stdev', 'Auto_Reached_stdev', 'Auto_Crossed_stdev', 'Score_stdev', 'DScore_stdev']:
        team_stats[i] = [[] for i in range(len(teams))]

    for i in range(len(app)):
        tmp = app.loc[[i+1]]
        if tmp['blue_score'].values != -1 and tmp['red_score'].values != -1:
            for j in ['blue_', 'red_']:
                for k in range(1,4):
                    if j == 'blue_':
                        l = 'red_'
                    else:
                        l = 'blue_'
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'Score'] += tmp[j+'score'].values[0]
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'DScore'] += tmp[l+'score'].values[0]
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'Auto_None_a'] += all_stat[j+'robot'+str(k)+'Auto_None'].loc[[i+1]]
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'Auto_Reached_a'] += all_stat[j+'robot'+str(k)+'Auto_Reached'].loc[[i+1]]
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'Auto_Crossed_a'] += all_stat[j+'robot'+str(k)+'Auto_Crossed'].loc[[i+1]]
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'Score_stdev'].append(tmp[j+'score'].values[0])
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'DScore_stdev'].append(tmp[l+'score'].values[0])
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'Auto_None_stdev'].append(all_stat[j+'robot'+str(k)+'Auto_None'].loc[[i+1]].values)
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'Auto_Reached_stdev'].append(all_stat[j+'robot'+str(k)+'Auto_Reached'].loc[[i+1]].values)
                    team_stats.at[tmp[j+'team'+str(k)].values[0], 'Auto_Crossed_stdev'].append(all_stat[j+'robot'+str(k)+'Auto_Crossed'].loc[[i+1]].values)
                    
    for i in ['Auto_None_stdev', 'Auto_Reached_stdev', 'Auto_Crossed_stdev', 'Score_stdev', 'DScore_stdev']:
        for team in teams:
            team_stats.at[team, i] = std(team_stats[i][team])
    
    #print(team_stats)
    for i in ['blue_', 'red_']:
        for j in range(1,4):
            for k in ['Auto_None', 'Auto_Reached', 'Auto_Crossed']:
                all_stat.drop([i+'robot'+str(j)+k], axis=1, inplace=True)

    rankings.sort_index(axis=0, inplace=True)
    if npall(team_stats.index.values == rankings.index.values):
        played = rankings['matches_played'].values
    else:
        raise ValueError('Stats and rankings disagree :(')

    for i in ['Auto_None_a', 'Auto_Reached_a', 'Auto_Crossed_a']:
        team_stats[i] /= played

    for i in ['Ranking_Score_a','Auto_a','Scale/Challenge_a','Goals_a','Defense_a']:
        team_stats[i] = rankings[i]

    team_stats['OPR'] = matmul(app_inv, team_stats['Score'])
    team_stats['DPR'] = matmul(app_inv, team_stats['DScore'])
    team_stats['CCWM'] = team_stats['OPR'].values - team_stats['DPR'].values
    team_stats['OAVE'] = team_stats['Score'] / played
    team_stats['DAVE'] = team_stats['DScore'] / played
    team_stats['CPR'] = team_stats['OPR'].values + team_stats['OAVE']

    to_mm = DataFrame(index=teams)
    ref_for_later = []
    for header in all_stat.columns.values:
        if 'blue_' in header:
            to_mm[header[5:]+'_OPR'] = 0
            team_stats[header[5:]+'_OAVE'] = 0
            team_stats[header[5:]+'_CPR'] = 0
            ref_for_later.append(header[5:])
        else:
            to_mm[header[4:]+'_DPR'] = 0
            team_stats[header[4:]+'_DAVE'] = 0
    #print(team_stats)


    all_headers = all_stat.columns.values
    for i in range(len(app)):
        tmp = all_stat.loc[[i+1]].values[0]
        tmp1 = app.loc[[i+1]]
        if tmp1['blue_score'].values != -1 and tmp1['red_score'].values != -1:
            for j, header in enumerate(all_headers):
                for team in ['blue_team1', 'blue_team2', 'blue_team3', 'red_team1', 'red_team2', 'red_team3']:
                    team_num = tmp1[team].values[0]
                    if ('blue_' in header) and ('blue_' in team):
                        to_mm.at[team_num, header[5:]+'_OPR'] += tmp[j]
                        team_stats.at[team_num, header[5:]+'_OAVE'] += tmp[j]
                    elif ('red_' in header) and ('red_' in team):
                        to_mm.at[team_num, header[4:]+'_OPR'] += tmp[j]
                        team_stats.at[team_num, header[4:]+'_OAVE'] += tmp[j]
                    elif ('blue_' in header) and ('red_' in team):
                        to_mm.at[team_num, header[5:]+'_DPR'] += tmp[j]
                        team_stats.at[team_num, header[5:]+'_DAVE'] += tmp[j]
                    elif ('red_' in header) and ('blue_' in team):
                        to_mm.at[team_num, header[4:]+'_DPR'] += tmp[j]
                        team_stats.at[team_num, header[4:]+'_DAVE'] += tmp[j]
                    else:
                        raise ValueError('Something is wrong')

    ts_headers = team_stats.columns.values
    for header in ts_headers:
        if header[-5:] == '_OAVE' or header[-5:] == '_DAVE':
            team_stats[header] /= played

    mm_headers = to_mm.columns.values
    for header in mm_headers:
        team_stats[header] = matmul(app_inv, to_mm[header].values)

    for header in ref_for_later:
        team_stats[header+'_CPR'] = team_stats[header+'_OPR'].values + team_stats[header+'_OAVE']

    team_stats.drop(['Score', 'DScore'], axis=1, inplace=True)
        
    headers = team_stats.columns.values
    for header in headers:
        average = mean(team_stats[header].values)
        tmp = []
        for stat in team_stats[header].values:
            tmp.append(stat-average)
        team_stats[header+'_from_mean'] = tmp
        
    #print(team_stats.columns.values)
    
    return team_stats

### More Team Statistics
more_team_stats is a function that provides additional statistics to those provided by The Blue Alliance. See the function stats_gen for more details about these statistics. These statistics are cached after they are created, in the directory 'cache/more_team_stats/'. The statistics can updated with new=True. The output shape from this function is (number of teams, 369).

In [14]:
def more_team_stats(event, new=False):
    """more_team_stats is a function that provides additional statistics to those
    provided by The Blue Alliance. See the function stats_gen for more details about
    these statistics. These statistics are cached after they are created, in the
    directory 'cache/more_team_stats/'. The statistics can updated with new=True. The
    output shape from this function is (number of teams, 369).
    
    Examples:
        more_team_stats('2016mokc') returns the statistics for 2016mokc and caches the
            data if it hasn't been already
        more_team_stats('2016abca', True) forces more_team_stats to recaculate the
            statistics and returns the statistics for 2016abca
    """
    
    from numpy import unique, zeros
    from pandas import DataFrame, read_csv
    from os.path import isfile
    
    if isfile('cache/more_team_stats/'+ event +'.csv') and not new:
        team_stats = read_csv('cache/more_team_stats/'+ event +'.csv')
        team_stats.set_index(team_stats.columns.values[0], inplace=True)
        del team_stats.index.name
        return team_stats
    else:
        matches = get_matches(event, new)
        rankings = get_rankings(event, new)
        all_stat = matches.loc[['qm']].reset_index(level=0, drop=True)
        app = all_stat[['blue_team1', 'blue_team2', 'blue_team3', 'blue_score',
                        'red_team1', 'red_team2', 'red_team3', 'red_score']]

        teams = unique(app[['blue_team1', 'blue_team2', 'blue_team3', 'red_team1', 'red_team2', 'red_team3']])
        teams.sort()
        #print(teams)

        length = len(teams)
        appearances = zeros([length, length])

        appearances = DataFrame(appearances, teams, teams)
        #appearances.at['frc1108', 'frc1723'] += 1 #row, column
        #print(app)

        for i in range(len(app)):
            tmp = app.loc[[i+1]].values.reshape(-1,)
            if len(tmp) == 8:
                if tmp[3] != -1 and tmp[7] != -1:
                    blue_tmp = tmp[:3]
                    for team in blue_tmp:
                        appearances.at[team, blue_tmp[0]] += 1
                        appearances.at[team, blue_tmp[1]] += 1
                        appearances.at[team, blue_tmp[2]] += 1

                    red_tmp = tmp[4:7]
                    for team in red_tmp:
                        appearances.at[team, red_tmp[0]] += 1
                        appearances.at[team, red_tmp[1]] += 1
                        appearances.at[team, red_tmp[2]] += 1
        appearances = appearances.values

        #####################################################################################################################
        #appearances, all_stat(raw match data), teams, app(reduced match data), rankings = get_rankings(...)
        team_stats = stats_gen(appearances, all_stat, teams, app, rankings)

        team_stats.to_csv('cache/more_team_stats/'+ event +'.csv')
            
        return team_stats

In [15]:
#more_team_stats('2016mokc', True)

In [16]:
def predict_matches(event, new=False):
    from numpy import array as nparray
    from numpy import save, load
    from pandas import DataFrame, read_csv
    from os.path import isfile
    
    if isfile('cache/predict_matches/'+ event +'.csv') and isfile('cache/predict_matches/'+ event +'.npy') and not new:
        predict_stats = read_csv('cache/predict_matches/'+ event +'.csv')
        predict_stats.set_index(predict_stats.columns.values[0], inplace=True)
        del predict_stats.index.name
        blue_win = load('cache/predict_matches/'+ event +'.npy')
        return predict_stats, blue_win
    
    matches = get_matches(event, new)
    stats = more_team_stats(event)#, new)

    fs = matches.loc[['qm']].reset_index(level=0, drop=True).sort_index()

    blue_win = []
    predict_stats = DataFrame(columns=stats.columns.values.tolist())

    for i in range(len(fs)):
        predict_stats = predict_stats.append(stats.loc[[fs.iloc[[i]]['blue_team1'].values[0]]], ignore_index=True)
        predict_stats.iloc[[i]] += stats.loc[[fs.iloc[[i]]['blue_team2'].values[0]]].values
        predict_stats.iloc[[i]] += stats.loc[[fs.iloc[[i]]['blue_team3'].values[0]]].values
        predict_stats.iloc[[i]] -= stats.loc[[fs.iloc[[i]]['red_team1'].values[0]]].values
        predict_stats.iloc[[i]] -= stats.loc[[fs.iloc[[i]]['red_team2'].values[0]]].values
        predict_stats.iloc[[i]] -= stats.loc[[fs.iloc[[i]]['red_team3'].values[0]]].values
        if fs.iloc[[i]]['blue_score'].values[0] >= fs.iloc[[i]]['red_score'].values[0]: #I'll give ties to blue for now
            blue_win.append(1)
        else:
            blue_win.append(0)

    blue_win = nparray(blue_win)
    
    #print(blue_win)
    
    predict_stats.to_csv('cache/predict_matches/'+ event +'.csv')
    save('cache/predict_matches/'+ event +'.npy', blue_win)
    
    return predict_stats, blue_win

In [17]:
#predict_matches('2016mokc', True)