In [15]:
import pandas as pd
import openpyxl
import time
import datetime
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, MetaData, select
from sqlalchemy.sql.expression import update
meta = MetaData()
from sqlalchemy.sql import text

In [16]:
def clean_igrf(path):
    """ Returns dictionary of game information given stats workbook
    
        Parameters:
            path (str): the path of the xlsx spreadsheet
            
        Returns:
            data_dict (dict): a dictionary with the following key-values:
                timestamp (timestamp): the time and date that the game occured
                home_league (str): name of home league
                away_league (str): name of away league
                home_team (str): name of home team
                away_team (str): name of away team
                home_pts (int): points home team scored
                away_pts (int): points away team scored
                home_roster (list): list of dictionaries with the following
                    key-value pairs for home team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
                away_roster (list): list of dictionaries with the following
                    key-value pairs for away team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
    """
    def clean_roster(df):
    
        """Given dataframe of player numbers and names
            Returns a roster as a list of dictionaries
        """

        df.dropna(axis=0, how="any", inplace=True)
        df.columns = ["skater_no", "skater_name"]
        roster = df.to_dict('records')
        return roster
    
    igrf_df = pd.read_excel(path, sheet_name="IGRF", engine="openpyxl")
    game_date = igrf_df.iloc[5, 1]
    start_time = igrf_df.iloc[5, 8]
    home_league = igrf_df.iloc[8,1]
    away_league = igrf_df.iloc[8,8]
    home_team = igrf_df.iloc[9,1]
    away_team = igrf_df.iloc[9,8]
    home_pts = igrf_df.iloc[36, 2]
    away_pts = igrf_df.iloc[36, 9]
    home_roster = clean_roster(igrf_df.iloc[12:31,1:3])
    away_roster = clean_roster(igrf_df.iloc[12:31,8:10])
    
    timestamp = datetime.datetime.combine(game_date, start_time)

    data_dict = {}

    data_dict = {"timestamp": timestamp,
                 "home_league": home_league,
                 "away_league": away_league,
                 "home_team": home_team,
                 "away_team": away_team,
                 "home_pts": home_pts,
                 "away_pts": away_pts,
                 "home_roster": home_roster,
                 "away_roster": away_roster
                }
    
    return data_dict

In [17]:
def game_table(data_dict):
    """Creates entires in the GGRD_Database table "game" of the game id the away
        and home team ids, the winning team's id, and the timestamp of the game
            
    
    Parameters:
            data_dict (dict): The Data Dictionary that comes from the function clean_igrf 
            that uses the excel game workbook
            
    Returns:
        game_id - this variable is REQUIRED in order to run the "jams" table.
    
    Notes:
        This does not create duplicate game id, if game is already
        in table, this function does nothing.
    """
    
    def resultproxy_clean(result):
        result = result.fetchone()
        result = result.values()
        result = result[0]
        return result
    
    timestamp = data_dict["timestamp"]
    timestamp = str(timestamp)
    timestamp = datetime.datetime.strptime(timestamp, '%Y-%m-%d %H:%M:%S')
    timestamp = datetime.datetime.strftime(timestamp, '%y%m%d%H')
    
    engine = create_engine('postgresql://postgres:postgres@localhost:5432/GGRD_Database')
    conn = engine.connect()
    
    if data_dict["home_pts"] > data_dict["away_pts"]:
        winningteam = data_dict["home_team"]
    else:
        winningteam = data_dict["away_team"]
        
    query_one = text("SELECT team_id FROM team WHERE team_name = :teamname;")
    result = conn.execute(query_one,{"teamname": data_dict["home_team"]})
    
    home_team_id = resultproxy_clean(result)
    
    query_two = text("SELECT team_id FROM team WHERE team_name = :teamname;")
    result = conn.execute(query_two,{"teamname": data_dict["away_team"]})
    
    away_team_id = resultproxy_clean(result)
    
    query_three = text("SELECT team_id FROM team WHERE team_name = :teamname;")
    result = conn.execute(query_three,{"teamname": winningteam})
    
    winning_team_id = resultproxy_clean(result)
    
    game_id = int(f"{home_team_id}{away_team_id}{timestamp}")
    
    query_four = text("INSERT INTO game (game_id, home_team_id, away_team_id, winning_team_id, timestamp) VALUES (:gameid, :hometeamid, :awayteamid, :winningteamid, :timestamp) ON CONFLICT DO NOTHING;")
    conn.execute(query_four,{"gameid" : game_id,
                        "hometeamid" : home_team_id,
                        "awayteamid" : away_team_id,
                        "winningteamid" : winning_team_id,
                        "timestamp" : data_dict["timestamp"]})
    
    return game_id

In [18]:
########## TESTING ##########

path = "../STATS/all_stars_2018/test.xlsx"

data_dict = clean_igrf(path)

  warn(msg)
