In [1]:
import pandas as pd
import openpyxl
import time
import datetime
from sqlalchemy import create_engine

In [2]:
def clean_igrf(path):
    """Returns dictionary of game information given stats workbook
    
        Parameters:
            path (str): the path of the xlsx spreadsheet
            
        Returns:
            data_dict (dict): a dictionary with the following key-values:
                timestamp (timestamp): the time and date that the game occured
                home_league (str): name of home league
                away_league (str): name of away league
                home_team (str): name of home team
                away_team (str): name of away team
                home_pts (int): points home team scored
                away_pts (int): points away team scored
                home_roster (list): list of dictionaries with the following
                    key-value pairs for home team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
                away_roster (list): list of dictionaries with the following
                    key-value pairs for away team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
    """
    
    igrf_df = pd.read_excel(path, sheet_name="IGRF", engine="openpyxl")
    game_date = igrf_df.iloc[5, 1]
    start_time = igrf_df.iloc[5, 8]
    home_league = igrf_df.iloc[8,1]
    away_league = igrf_df.iloc[8,8]
    home_team = igrf_df.iloc[9,1]
    away_team = igrf_df.iloc[9,8]
    home_pts = igrf_df.iloc[36, 2]
    away_pts = igrf_df.iloc[36, 9]
    home_roster = clean_roster(igrf_df.iloc[12:31,1:3])
    away_roster = clean_roster(igrf_df.iloc[12:31,8:10])
    
    datetime_object = datetime.datetime.combine(game_date, start_time)
    timestamp = datetime.datetime.timestamp(datetime_object)

    data_dict = {}

    data_dict = {"timestamp": timestamp,
                 "home_league": home_league,
                 "away_league": away_league,
                 "home_team": home_team,
                 "away_team": away_team,
                 "home_pts": home_pts,
                 "away_pts": away_pts,
                 "home_roster": home_roster,
                 "away_roster": away_roster
                }
    
    return data_dict

In [3]:
def clean_roster(df):
    
    """Given dataframe of player numbers and names
        Returns a roster as a list of dictionaries"""
    
    df.dropna(axis=0, how="any", inplace=True)
    df.columns = ["skater_no", "skater_name"]
    roster = df.to_dict('records')
    return roster

In [6]:
igrf_dict = clean_igrf("../STATS/all_stars_2018/test.xlsx")

  warn(msg)


In [None]:
#for every skater - only add entry if doesn't exist

# 1) if EXISTS - GET Player ID from the table

# 2) if DOES NOT EXIST - then add player and establish new id

In [None]:
home_team = igrf_dict['home_roster']

In [14]:
home_team_df = pd.DataFrame.from_dict(home_team)
home_team_df

Unnamed: 0,skater_no,skater_name
0,10,Giles
1,1680,D.A.R.Y.L.
2,17,Fast and Luce
3,1706,Miss Tea Maven
4,1978,Space Invader
5,213,Spork Chop
6,23,Livvie Smalls
7,314,Caf Fiend
8,32,Cork Rebel
9,4,Violet Knockout


In [16]:
away_team = igrf_dict['away_roster']

In [17]:
away_team_df = pd.DataFrame.from_dict(away_team)
away_team_df

Unnamed: 0,skater_no,skater_name
0,77,Mad Dog Murph
1,111,Jamsterella
2,21,Coco Frye
3,235,Atomic Mel-Down
4,27,H.M.Smith
5,301,Legs R Us
6,480,Erin Jackson
7,6,Snot Rocket Science
8,62,Fancy Schmancy
9,814,Leanne Groll


In [18]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/GGRD_Database')
home_team_df.to_sql('home_team_df', engine)