In [1]:
import requests
from pathlib import Path
import pandas as pd

In [4]:
def read_multiindex_csv_to_df(path, header_lines=[0,1]):
    '''
    signature: read_multiindex_csv_to_df(path, header_lines=[0,1])
    
    reads csv with multiple column names for each column into a pandas dataframe
    
    parameters:
    -path: REQUIRED, string or pathlib.path object pointing to the csv file
    
    -header_lines: OPTIONAL, the lines of the csv to be used as column names. by default set to [0,1] (the first two lines of data)
    '''
    df = pd.read_csv(str(path), header=header_lines)
    col_names = list(df.columns)
    tmp_fill = ''
    for i in range(0, len(col_names)):
        current = col_names[i]
        if not current[0].startswith('Unnamed'):
            tmp_fill = current[0]
        else:
            col_names[i] = (tmp_fill, current[1])
    new_cols = pd.MultiIndex.from_tuples(col_names)
    df.columns = new_cols
    return df

In [5]:
s11_raw_path = Path.cwd().parent / 'data' / 's11_raw_replay_data.csv'

df = read_multiindex_csv_to_df(s11_raw_path)
df

Unnamed: 0_level_0,Series,Series,Series,Series,Series,Series,Series,Series,Series,Series,...,Player Stats Positioning,Player Stats Positioning,Player Stats Positioning,Player Stats Positioning,Player Stats Positioning,Player Stats Positioning,Player Stats Positioning,Player Stats Positioning,Player Stats Demo,Player Stats Demo
Unnamed: 0_level_1,submitter_discord_id,match_id,submission_date,submission_time,replay_date,replay_time,created_date,created_time,league,season,...,percent_defensive_half,percent_offensive_half,percent_behind_ball,percent_infront_ball,percent_most_back,percent_most_forward,percent_closest_to_ball,percent_farthest_from_ball,inflicted,taken
0,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:15:40,8/30/2020,22:12:03,premier,11,...,68.972145,31.027857,80.793360,19.206646,50.560970,48.352425,56.744900,42.109604,1,2
1,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:15:40,8/30/2020,22:12:03,premier,11,...,70.261340,29.738660,73.705120,26.294886,50.060368,50.767100,42.345180,58.541183,0,0
2,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:15:40,8/30/2020,22:12:03,premier,11,...,61.254753,38.745240,71.108390,28.891603,51.473835,48.588005,53.181778,46.732826,2,0
3,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:15:40,8/30/2020,22:12:03,premier,11,...,58.708015,41.291980,68.666830,31.333164,48.411320,50.325394,45.761063,53.122883,0,1
4,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:24:03,8/30/2020,22:12:02,premier,11,...,58.649520,41.350470,83.983730,16.016270,45.823940,55.137028,59.496975,41.463993,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16209,190866782926536704,PL-11-10-13,11/16/2020,23:11:44,11/15/2020,10:30:04,11/17/2020,0:12:04,premier,11,...,66.562880,33.437120,68.756256,31.243746,50.231480,50.752316,51.707176,49.276620,1,2
16210,190866782926536704,PL-11-10-13,11/16/2020,23:11:44,11/15/2020,10:37:02,11/17/2020,0:12:05,premier,11,...,62.756813,37.243183,79.583090,20.416904,54.743470,47.116943,44.690315,57.170094,1,1
16211,190866782926536704,PL-11-10-13,11/16/2020,23:11:44,11/15/2020,10:37:02,11/17/2020,0:12:05,premier,11,...,58.828182,41.171810,70.971436,29.028559,46.856945,54.396810,56.130110,45.123642,1,1
16212,190866782926536704,PL-11-10-13,11/16/2020,23:11:44,11/15/2020,10:37:02,11/17/2020,0:12:05,premier,11,...,68.076980,31.923014,69.795900,30.204105,47.723595,54.396810,55.841230,46.279180,1,0


In [6]:
df['Series']

Unnamed: 0,submitter_discord_id,match_id,submission_date,submission_time,replay_date,replay_time,created_date,created_time,league,season,match,series
0,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:15:40,8/30/2020,22:12:03,premier,11,1,13
1,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:15:40,8/30/2020,22:12:03,premier,11,1,13
2,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:15:40,8/30/2020,22:12:03,premier,11,1,13
3,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:15:40,8/30/2020,22:12:03,premier,11,1,13
4,140799418923220992,PL-11-1-13,8/30/2020,20:11:41,8/30/2020,15:24:03,8/30/2020,22:12:02,premier,11,1,13
...,...,...,...,...,...,...,...,...,...,...,...,...
16209,190866782926536704,PL-11-10-13,11/16/2020,23:11:44,11/15/2020,10:30:04,11/17/2020,0:12:04,premier,11,10,13
16210,190866782926536704,PL-11-10-13,11/16/2020,23:11:44,11/15/2020,10:37:02,11/17/2020,0:12:05,premier,11,10,13
16211,190866782926536704,PL-11-10-13,11/16/2020,23:11:44,11/15/2020,10:37:02,11/17/2020,0:12:05,premier,11,10,13
16212,190866782926536704,PL-11-10-13,11/16/2020,23:11:44,11/15/2020,10:37:02,11/17/2020,0:12:05,premier,11,10,13
