In [5]:
import pandas as pd
import openpyxl

In [25]:
def clean_igrf(path):
    """Returns dictionary of game information given stats workbook
    
        Parameters:
            path (str): the path of the xlsx spreadsheet
            
        Returns:
            data_dict (dict): a dictionary with the following key-values:
                game_date (datetime): date of game
                start_time (datetime): start time of game
                home_league (str): name of home league
                away_league (str): name of away league
                home_team (str): name of home team
                away_team (str): name of away team
                home_pts (int): points home team scored
                away_pts (int): points away team scored
                home_roster (list): list of dictionaries with the following
                    key-value pairs for home team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
                away_roster (list): list of dictionaries with the following
                    key-value pairs for away team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
    """
    
    igrf_df = pd.read_excel(path, sheet_name="IGRF", engine="openpyxl")
    game_date = igrf_df.iloc[5, 1]
    start_time = igrf_df.iloc[5, 8]
    home_league = igrf_df.iloc[8,1]
    away_league = igrf_df.iloc[8,8]
    home_team = igrf_df.iloc[9,1]
    away_team = igrf_df.iloc[9,8]
    home_pts = igrf_df.iloc[36, 2]
    away_pts = igrf_df.iloc[36, 9]
    home_roster = clean_roster(igrf_df.iloc[12:31,1:3])
    away_roster = clean_roster(igrf_df.iloc[12:31,8:10])

    data_dict = {}

    data_dict = {"game_date": game_date,
                 "start_time": start_time,
                 "home_league": home_league,
                 "away_league": away_league,
                 "home_team": home_team,
                 "away_team": away_team,
                 "home_pts": home_pts,
                 "away_pts": away_pts,
                 "home_roster": home_roster,
                 "away_roster": away_roster
                }
    
    return data_dict

In [26]:
def clean_roster(df):
    
    """Given dataframe of player numbers and names
        Returns a roster as a list of dictionaries"""
    
    df.dropna(axis=0, how="any", inplace=True)
    df.columns = ["skater_no", "skater_name"]
    roster = df.to_dict('records')
    return roster

In [27]:
clean_igrf("../STATS/all_stars_2018/test.xlsx")

  warn(msg)


{'game_date': datetime.datetime(2018, 5, 12, 0, 0),
 'start_time': datetime.time(18, 30),
 'home_league': 'Gotham Girls Roller Derby',
 'away_league': 'Jacksonville Roller Derby',
 'home_team': 'All-Stars',
 'away_team': 'New Jax City Rollers',
 'home_pts': 225,
 'away_pts': 113,
 'home_roster': [{'skater_no': '010', 'skater_name': 'Giles'},
  {'skater_no': '1680', 'skater_name': 'D.A.R.Y.L.'},
  {'skater_no': '17', 'skater_name': 'Fast and Luce'},
  {'skater_no': '1706', 'skater_name': 'Miss Tea Maven'},
  {'skater_no': '1978', 'skater_name': 'Space Invader'},
  {'skater_no': '213', 'skater_name': 'Spork Chop'},
  {'skater_no': '23', 'skater_name': 'Livvie Smalls'},
  {'skater_no': '314', 'skater_name': 'Caf Fiend'},
  {'skater_no': '32', 'skater_name': 'Cork Rebel'},
  {'skater_no': '4', 'skater_name': 'Violet Knockout'},
  {'skater_no': '41', 'skater_name': 'Roxy Dallas'},
  {'skater_no': '4500', 'skater_name': 'Bonita Apple Bomb'},
  {'skater_no': '56', 'skater_name': 'Kate Sera Se

In [12]:
igrf_df = pd.read_excel("../STATS/all_stars_2018/test.xlsx", sheet_name="IGRF", engine="openpyxl")

igrf_df.head()

Unnamed: 0,WFTDA Interleague Game Reporting Form (IGRF),Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,...,Unnamed: 247,Unnamed: 248,Unnamed: 249,Unnamed: 250,Unnamed: 251,Unnamed: 252,Unnamed: 253,Unnamed: 254,Unnamed: 255,Unnamed: 256
0,Section 1. VENUE & ROSTERS (Complete BEFORE th...,,,,,,,,,,...,,,,,,,,,,
1,Location:,Skate Station Mandarin,,,,,,,Jacksonville,,...,,,,,,,,,,
2,,VENUE NAME,,,,,,,CITY,,...,,,,,,,,,,
3,Tournament/\nMulti-Day Event:,,,,,,,,,,...,,,,,,,,,,
4,,TOURNAMENT/MULTI-DAY EVENT NAME,,,,,,,HOST LEAGUE NAME,,...,,,,,,,,,,


In [18]:
igrf_df.iloc[9,8]

'New Jax City Rollers'