In [1]:
import pandas as pd
import openpyxl
import time
import datetime
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, MetaData
from sqlalchemy.sql.expression import update
meta = MetaData()

In [2]:
def clean_igrf(path):
    """Returns dictionary of game information given stats workbook
    
        Parameters:
            path (str): the path of the xlsx spreadsheet
            
        Returns:
            data_dict (dict): a dictionary with the following key-values:
                timestamp (timestamp): the time and date that the game occured
                home_league (str): name of home league
                away_league (str): name of away league
                home_team (str): name of home team
                away_team (str): name of away team
                home_pts (int): points home team scored
                away_pts (int): points away team scored
                home_roster (list): list of dictionaries with the following
                    key-value pairs for home team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
                away_roster (list): list of dictionaries with the following
                    key-value pairs for away team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
    """
    
    igrf_df = pd.read_excel(path, sheet_name="IGRF", engine="openpyxl")
    game_date = igrf_df.iloc[5, 1]
    start_time = igrf_df.iloc[5, 8]
    home_league = igrf_df.iloc[8,1]
    away_league = igrf_df.iloc[8,8]
    home_team = igrf_df.iloc[9,1]
    away_team = igrf_df.iloc[9,8]
    home_pts = igrf_df.iloc[36, 2]
    away_pts = igrf_df.iloc[36, 9]
    home_roster = clean_roster(igrf_df.iloc[12:31,1:3])
    away_roster = clean_roster(igrf_df.iloc[12:31,8:10])
    
    datetime_object = datetime.datetime.combine(game_date, start_time)
    timestamp = datetime.datetime.timestamp(datetime_object)

    data_dict = {}

    data_dict = {"timestamp": timestamp,
                 "home_league": home_league,
                 "away_league": away_league,
                 "home_team": home_team,
                 "away_team": away_team,
                 "home_pts": home_pts,
                 "away_pts": away_pts,
                 "home_roster": home_roster,
                 "away_roster": away_roster
                }
    
    return data_dict

In [3]:
def clean_roster(df):
    
    """Given dataframe of player numbers and names
        Returns a roster as a list of dictionaries"""
    
    df.dropna(axis=0, how="any", inplace=True)
    df.columns = ["skater_no", "skater_name"]
    roster = df.to_dict('records')
    return roster

In [4]:
igrf_dict = clean_igrf("../STATS/all_stars_2018/test.xlsx")

  warn(msg)


In [5]:
#for every skater - only add entry if doesn't exist
# 1) if EXISTS - GET Player ID from the table
# 2) if DOES NOT EXIST - then add player and establish new id

In [6]:
game_roster = igrf_dict['home_roster'] + igrf_dict['away_roster']

In [7]:
game_roster_df = pd.DataFrame.from_dict(game_roster)

In [8]:
game_roster_df["skater_name"]

### 1) Get rid of spaces and Special Characters
### 2) Get first 3 characters in the string of "Skatername" = (Variable)
### 3) Make those 3 letters CAPITAL
### 4) Create a "Skater Id" Using the "Skater No" + "Variable" (example 123ABC)
game_roster_df.head()

Unnamed: 0,skater_no,skater_name
0,10,Giles
1,1680,D.A.R.Y.L.
2,17,Fast and Luce
3,1706,Miss Tea Maven
4,1978,Space Invader


In [9]:
def clean_name (skater_name):   
    skater_name = [character for character in skater_name if character.isalnum()]    
    if len(skater_name) >= 3:
        skater_name = "".join(skater_name[0:3])
    elif len(skater_name) >= 2:
        skater_name = "".join(skater_name[0:2])
    else:
        skater_name = "".join(skater_name[0:1])        
    skater_name = skater_name.upper() 
    return skater_name

In [10]:
game_roster_df["skater_id"] = game_roster_df.skater_no + game_roster_df.skater_name.apply(clean_name)

game_roster_df

Unnamed: 0,skater_no,skater_name,skater_id
0,10,Giles,010GIL
1,1680,D.A.R.Y.L.,1680DAR
2,17,Fast and Luce,17FAS
3,1706,Miss Tea Maven,1706MIS
4,1978,Space Invader,1978SPA
5,213,Spork Chop,213SPO
6,23,Livvie Smalls,23LIV
7,314,Caf Fiend,314CAF
8,32,Cork Rebel,32COR
9,4,Violet Knockout,4VIO


In [None]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/GGRD_Database')
conn = engine.connect()

In [None]:
game_roster_df.to_sql('skater', engine, if_exists='append',index=False)

In [None]:
engine.execute(skater.insert(), skater_no= row["skater_no"], skater_name=row["skater_name"], skater_id=["skater_id"])

In [None]:
game_roster_df.iterrows()

In [21]:
for row in game_roster_df.iterrows():
    engine.execute(skater.insert(),
                       skater_no= row["skater_no"],
                       skater_name=row["skater_name"],
                       skater_id=["skater_id"])

SyntaxError: unexpected EOF while parsing (<ipython-input-21-721c6586f12a>, line 3)