In [1]:
import pandas as pd
import openpyxl
import time
import datetime
from sqlalchemy import create_engine
from sqlalchemy import Table, Column, Integer, String, MetaData
from sqlalchemy.sql.expression import update
meta = MetaData()
from sqlalchemy.sql import text

In [2]:
def clean_lineups(path):
    
    stats_df = pd.read_excel(path, sheet_name="Lineups", engine="openpyxl")
    stats_df = stats_df.fillna(0)
    stats_df = stats_df.drop([0], axis = "index")
    stats_df = stats_df.reset_index()
    stats_df = stats_df.rename(columns=stats_df.iloc[0]).drop(stats_df.index[0])
    stats_df = stats_df.drop([1, "noPivot", "Box", 0.0, 0, "Team Roster"], axis = 1)
    stats_df.columns = ["jam_number",
                        "home_jammer_number",
                        "home_pivot_number",
                        "home_blocker_1_number",
                        "home_blocker_2_number",
                        "home_blocker_3_number",
                        "redundant",
                        "away_jammer_number",
                        "away_pivot_number",
                        "away_blocker_1_number",
                        "away_blocker_2_number",
                        "away_blocker_3_number"]
    stats_df = stats_df.drop(["redundant"], axis = 1)
    #Splitting Period one and two into separate dataframes
    period_one_lineups_df = stats_df[0:38]
    period_one_lineups_df["period"] = 1
    period_two_lineups_df = stats_df[42:80]
    period_two_lineups_df["period"] = 2
    frames = [period_one_lineups_df, period_two_lineups_df]
    result_df = pd.concat(frames).reset_index(drop=True)
    
    return result_df

In [3]:
lineup_df = clean_lineups("../STATS/all_stars_2018/test.xlsx")

  warn(msg)
  warn(msg)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  period_one_lineups_df["period"] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  period_two_lineups_df["period"] = 2


In [4]:
def clean_sk(path):

    stats_df = pd.read_excel(path, sheet_name="SK", engine="openpyxl")

    #Formatting the sheet
    stats_df = stats_df.dropna(axis=1, how="all")
    stats_df = stats_df.dropna(axis="index", how="all").reset_index()
    stats_df = stats_df = stats_df.rename(columns=stats_df.iloc[0]).drop(stats_df.index[0])
    stats_df = stats_df.drop([0], axis = 1)
    stats_df.columns = ["jam_number",
                    "not_needed",
                    "home_jammer_number",
                    "home_points_scored",
                    "not_needed",
                    "not_needed",
                    "home_lost_lead",
                    "home_gained_lead",
                    "not_needed",
                    "home_called_off_jam",
                    "not_needed",
                    "not_needed",
                    "not_needed",
                    "not_needed",
                    "not_needed",
                    "not_needed",
                    "not_needed",
                    "away_jammer_number",
                    "away_points_scored",
                    "not_needed",
                    "not_needed",
                    "away_lost_lead",
                    "away_gained_lead",
                    "not_needed",
                    "away_called_off_jam",
                    "not_needed",
                    "not_needed",
                    "not_needed",
                    "not_needed",
                    "not_needed"]
    stats_df  = stats_df.drop(["not_needed"], axis = 1)
    stats_df = stats_df.fillna("0")
    stats_df = stats_df[(stats_df.T != 0).any()]
    stats_df = stats_df.reset_index(drop=True)

    #Splitting Period one and two into separate dataframes
    period_one_stats_df = stats_df[:38]
    period_one_stats_df["period"] = 1
    period_two_stats_df = stats_df[42:80]
    period_two_stats_df["period"] = 2
    
    frames = [period_one_stats_df, period_two_stats_df]
    result_df = pd.concat(frames).reset_index(drop=True)
    
    return result_df

In [5]:
def clean_igrf(path):
    """ Returns dictionary of game information given stats workbook
    
        Parameters:
            path (str): the path of the xlsx spreadsheet
            
        Returns:
            data_dict (dict): a dictionary with the following key-values:
                timestamp (timestamp): the time and date that the game occured
                home_league (str): name of home league
                away_league (str): name of away league
                home_team (str): name of home team
                away_team (str): name of away team
                home_pts (int): points home team scored
                away_pts (int): points away team scored
                home_roster (list): list of dictionaries with the following
                    key-value pairs for home team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
                away_roster (list): list of dictionaries with the following
                    key-value pairs for away team skaters:
                    skater_no (int): skater's number
                    skater_name (str): name of skater
    """
    def clean_roster(df):
    
        """Given dataframe of player numbers and names
            Returns a roster as a list of dictionaries
        """

        df.dropna(axis=0, how="any", inplace=True)
        df.columns = ["skater_no", "skater_name"]
        roster = df.to_dict('records')
        return roster
    
    igrf_df = pd.read_excel(path, sheet_name="IGRF", engine="openpyxl")
    game_date = igrf_df.iloc[5, 1]
    start_time = igrf_df.iloc[5, 8]
    home_league = igrf_df.iloc[8,1]
    away_league = igrf_df.iloc[8,8]
    home_team = igrf_df.iloc[9,1]
    away_team = igrf_df.iloc[9,8]
    home_pts = igrf_df.iloc[36, 2]
    away_pts = igrf_df.iloc[36, 9]
    home_roster = clean_roster(igrf_df.iloc[12:31,1:3])
    away_roster = clean_roster(igrf_df.iloc[12:31,8:10])
    
    timestamp = datetime.datetime.combine(game_date, start_time)

    data_dict = {}

    data_dict = {"timestamp": timestamp,
                 "home_league": home_league,
                 "away_league": away_league,
                 "home_team": home_team,
                 "away_team": away_team,
                 "home_pts": home_pts,
                 "away_pts": away_pts,
                 "home_roster": home_roster,
                 "away_roster": away_roster
                }
    
    return data_dict

In [6]:
def resultproxy_clean(result):
    result = result.fetchone()
    result = result.values()
    result = result[0]
    return result

In [7]:
scores_df = clean_sk("../STATS/all_stars_2018/test.xlsx")
data_dict = clean_igrf("../STATS/all_stars_2018/test.xlsx")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  period_one_stats_df["period"] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  period_two_stats_df["period"] = 2
  warn(msg)


In [8]:
home_jammer_list = []
home_pivot_list = []
home_blocker_1_list = []
home_blocker_2_list = []
home_blocker_3_list = []

away_jammer_list = []
away_pivot_list = []
away_blocker_1_list = []
away_blocker_2_list = []
away_blocker_3_list = []

for row in lineup_df.itertuples():
    #HOME TEAM
    for x in range(76):
        try:
            #HOME TEAM
            if row[2] == data_dict['home_roster'][x]["skater_no"]:
                home_jammer_list.append(data_dict['home_roster'][x]["skater_name"])
            if row[3] == data_dict['home_roster'][x]["skater_no"]:
                home_pivot_list.append(data_dict['home_roster'][x]["skater_name"])
            if row[4] == data_dict['home_roster'][x]["skater_no"]:
                home_blocker_1_list.append(data_dict['home_roster'][x]["skater_name"])
            if row[5] == data_dict['home_roster'][x]["skater_no"]:
                home_blocker_2_list.append(data_dict['home_roster'][x]["skater_name"])
            if row[6] == data_dict['home_roster'][x]["skater_no"]:
                home_blocker_3_list.append(data_dict['home_roster'][x]["skater_name"])
                
            #AWAY TEAM
            if row[7] == data_dict['away_roster'][x]["skater_no"]:
                away_jammer_list.append(data_dict['away_roster'][x]["skater_name"])
            if row[8] == data_dict['away_roster'][x]["skater_no"]:
                away_pivot_list.append(data_dict['away_roster'][x]["skater_name"])
            if row[9] == data_dict['away_roster'][x]["skater_no"]:
                away_blocker_1_list.append(data_dict['away_roster'][x]["skater_name"])
            if row[10] == data_dict['away_roster'][x]["skater_no"]:
                away_blocker_2_list.append(data_dict['away_roster'][x]["skater_name"])
            if row[11] == data_dict['away_roster'][x]["skater_no"]:
                away_blocker_3_list.append(data_dict['away_roster'][x]["skater_name"])
        except IndexError:
            break

In [9]:
lineup_df.loc[range(len(home_jammer_list)),"home_jammer_name"] = home_jammer_list
lineup_df.loc[range(len(home_pivot_list)),"home_pivot_name"] = home_pivot_list
lineup_df.loc[range(len(home_blocker_1_list)),"home_blocker_1_name"] = home_blocker_1_list
lineup_df.loc[range(len(home_blocker_2_list)),"home_blocker_2_name"] = home_blocker_2_list
lineup_df.loc[range(len(home_blocker_3_list)),"home_blocker_3_name"] = home_blocker_3_list

lineup_df.loc[range(len(away_jammer_list)),"away_jammer_name"] = away_jammer_list
lineup_df.loc[range(len(away_pivot_list)),"away_pivot_name"] = away_pivot_list
lineup_df.loc[range(len(away_blocker_1_list)),"away_blocker_1_name"] = away_blocker_1_list
lineup_df.loc[range(len(away_blocker_2_list)),"away_blocker_2_name"] = away_blocker_2_list
lineup_df.loc[range(len(away_blocker_3_list)),"away_blocker_3_name"] = away_blocker_3_list

In [10]:
engine = create_engine('postgresql://postgres:postgres@localhost:5432/GGRD_Database')
conn = engine.connect()

In [11]:
#for row in lineup_df.itertuples():
#print(row[2])

In [12]:
#for row in lineup_df.itertuples():
#print(row[21])

In [19]:
for row in lineup_df.itertuples():
    jam_number = int(row[1])
    period_number = int(row[12])

    query_one = text("SELECT skater_id FROM skater WHERE skater_number = :sno AND skater_name = :sna;")

    try:
        ######### HOME TEAM #############
        home_jammer_id = conn.execute(query_one,{"sno" : row[2], "sna" : row[13]})
        home_jammer_id = resultproxy_clean(home_jammer_id)
    except :
        pass
    try:
        home_pivot_id = conn.execute(query_one,{"sno" : row[3], "sna" : row[14]})
        home_pivot_id = resultproxy_clean(home_pivot_id)
    except :
        pass
    try:
        home_blocker1_id = conn.execute(query_one,{"sno" : row[4], "sna" : row[15]})
        home_blocker1_id = resultproxy_clean(home_blocker1_id)
    except :
        pass
    try:
        home_blocker2_id = conn.execute(query_one,{"sno" : row[5], "sna" : row[16]})
        home_blocker2_id = resultproxy_clean(home_blocker2_id)
    except :
        pass
    try:
        home_blocker3_id = conn.execute(query_one,{"sno" : row[6], "sna" : row[17]})
        home_blocker3_id = resultproxy_clean(home_blocker3_id)   
    except :
        pass     
        ######### AWAY TEAM #############
    try:
        away_jammer_id = conn.execute(query_one,{"sno" : row[7], "sna" : row[18]})
        away_jammer_id = resultproxy_clean(away_jammer_id)
    except :
        pass
    try:
        away_pivot_id = conn.execute(query_one,{"sno" : row[8], "sna" : row[19]})
        away_pivot_id = resultproxy_clean(away_pivot_id)
    except :
        pass
    try:
        away_blocker1_id = conn.execute(query_one,{"sno" : row[9], "sna" : row[20]})
        away_blocker1_id = resultproxy_clean(away_blocker1_id)
    except :
        pass
    try:
        away_blocker2_id = conn.execute(query_one,{"sno" : row[10], "sna" : row[21]})
        away_blocker2_id = resultproxy_clean(away_blocker2_id)
    except :
        pass
    try:
        away_blocker3_id = conn.execute(query_one,{"sno" : row[11], "sna" : row[22]})
        away_blocker3_id = resultproxy_clean(away_blocker3_id)
    except :
        pass
    
    query_two = text("INSERT INTO jams (jam_number, \
        period_number,\
        home_jammer,\
        home_pivot,\
        home_blocker_1,\
        home_blocker_2,\
        home_blocker_3,\
        away_jammer,\
        away_pivot,\
        away_blocker_1,\
        away_blocker_3) VALUES (:jn, :pn, :hj, :hp, :hb1, :hb2, :hb3, :aj, :ap, :ab1, :ab3) ON CONFLICT DO NOTHING;")
    try:
        conn.execute(query_two,
            {"jn" : jam_number,
            "pn" : period_number,
            "hj" : home_jammer_id,
            "hp" : home_pivot_id,
            "hb1" : home_blocker1_id,
            "hb2" : home_blocker2_id,
            "hb3" : home_blocker3_id,
            "aj" : away_jammer_id,
            "ap" : away_pivot_id,
            "ab1" : away_blocker1_id,
            "ab2" : away_blocker2_id,
            "ab3" : away_blocker3_id})
    except :
        pass
    
    for row in scores_df.itertuples():
        home_team_points_scored = int(row[3])
        away_team_points_scored = int(row[8])
        if row[5] == 1:
            home_lead = True
            away_lead = False
            if row[4] == 1:
                lost_lead = True
            else:
                lost_lead = False
            if row[6] == 1:
                called_jam_off = True
            else:
                called_jam_off = False
        else:
            home_lead = False
            away_lead = True
            if row[9] == 1:
                lost_lead = True
            else:
                lost_lead = False
            if row[11] == 1:
                called_jam_off = True
            else:
                called_jam_off = False

        query_three = text("UPDATE jams SET home_team_points_scored = :htps, \
            away_team_points_scored = :atps, \
            home_lead = :hl, \
            away_lead = :al, \
            lost_lead = :ll, \
            called_jam_off = :cjo \
            WHERE jam_number = :jn;")

        conn.execute(query_three,
            {"jn" : jam_number,
             "pn" : period_number,
            "htps": home_team_points_scored,
             "atps" : away_team_points_scored,
             "hl" : home_lead,
             "al": away_lead,
             "ll": lost_lead,
             "cjo": called_jam_off})

In [14]:
#LINE UP df

# jam_number = row[1]
# period_number = row[12]

# home_jammer = row[2] // row[13]
# home_pivot = row[3] // row[14]
# home_blocker_1 = row[4] // row[15]
# home_blocker_2 = row[5] // row[16]
# home_blocker_3 = row[6] // row[17]

# away_jammer = row[7] // row[18]
# away_pivot = row[8] // row[19]
# away_blocker_1 = row[9] // row[20]
# away_blocker_2 = row[10] // row[21]
# away_blocker_3 = row[11] // row[22]

In [15]:
# query = text("INSERT INTO jams (game_id, .\
#     jam_number,.\
#     period_number,.\
#     home_jammer,.\ #JAMMER ID
#     home_pivot,.\ #
#     home_blocker_1,.\
#     home_blocker_2,.\
#     home_blocker_3,.\
#     home_team_points_scored,.\
#     away_team_points_scored,.\
#     away_jammer,.\
#     away_pivot,.\
#     away_blocker_1,.\
#     away_blocker_2,.\
#     away_blocker_3,.\
#     away_lead,.\
#     home_lead,.\
#     lost_lead,.\
#     called_jam_off,.\
#     seconds_to_lead,.\
#     seconds_to_first_pass,).\
#     VALUES .\
#     (:sna, :sno) .\
#     ON CONFLICT DO NOTHING;")
# conn.execute(query,
#         {"game_id" :
#     "jam_number" :
#     "period_number" :
#     "home_jammer" :
#     "home_pivot" :
#     "home_blocker_1" :
#     "home_blocker_2" :
#     "home_blocker_3" :
#     "home_team_points_scored" :
#     "away_team_points_scored" :
#     "away_jammer" :
#     "away_pivot" :
#     "away_blocker_1" :
#     "away_blocker_2" :
#     "away_blocker_3" :
#     "away_lead" :
#     "home_lead" :
#     "lost_lead" :
#     "called_jam_off" :
#     "seconds_to_lead" :
#     "seconds_to_first_pass" :

In [16]:
lineup_df.head(2)

Unnamed: 0,jam_number,home_jammer_number,home_pivot_number,home_blocker_1_number,home_blocker_2_number,home_blocker_3_number,away_jammer_number,away_pivot_number,away_blocker_1_number,away_blocker_2_number,...,home_jammer_name,home_pivot_name,home_blocker_1_name,home_blocker_2_name,home_blocker_3_name,away_jammer_name,away_pivot_name,away_blocker_1_name,away_blocker_2_name,away_blocker_3_name
0,1,1706,23,17,314,68,480,301,912,950,...,Miss Tea Maven,Livvie Smalls,Fast and Luce,Caf Fiend,Sarah McKemie,Erin Jackson,Legs R Us,Eirinn Go Brawl,Tic Tac Toni,Stephanie Gentz
1,2,10,1680,41,4500,4,814,21,235,82,...,Giles,D.A.R.Y.L.,Roxy Dallas,Bonita Apple Bomb,Violet Knockout,Leanne Groll,Coco Frye,Atomic Mel-Down,Fancy Schmancy,Fancy Schmancy


In [17]:
period1 = lineup_df[0:22]
period1.head(2)

Unnamed: 0,jam_number,home_jammer_number,home_pivot_number,home_blocker_1_number,home_blocker_2_number,home_blocker_3_number,away_jammer_number,away_pivot_number,away_blocker_1_number,away_blocker_2_number,...,home_jammer_name,home_pivot_name,home_blocker_1_name,home_blocker_2_name,home_blocker_3_name,away_jammer_name,away_pivot_name,away_blocker_1_name,away_blocker_2_name,away_blocker_3_name
0,1,1706,23,17,314,68,480,301,912,950,...,Miss Tea Maven,Livvie Smalls,Fast and Luce,Caf Fiend,Sarah McKemie,Erin Jackson,Legs R Us,Eirinn Go Brawl,Tic Tac Toni,Stephanie Gentz
1,2,10,1680,41,4500,4,814,21,235,82,...,Giles,D.A.R.Y.L.,Roxy Dallas,Bonita Apple Bomb,Violet Knockout,Leanne Groll,Coco Frye,Atomic Mel-Down,Fancy Schmancy,Fancy Schmancy
