In [19]:
import requests
import pandas as pd
from pathlib import Path
import logging
import os
import sys
from scipy.stats import zscore
import dataframe_image as dfi

current_dir = os.path.dirname(os.path.abspath("__file__"))
parent_dir = os.path.abspath(os.path.join(current_dir, '..'))

if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)
from config.config import Config
from visualization.visualization import make_highlighted_table, team_styled_table

config = Config()

In [20]:
def fetch_playoff_player_stats(group_id, token=config._ballchasing_token):

    url = f"https://ballchasing.com/api/groups/{group_id}"
    headers = {"Authorization": token}
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    data = response.json()
    df = pd.json_normalize(data, record_path=["players"])
    return df

In [21]:
def fetch_playoff_team_stats(group_id, token=config._ballchasing_token):

    url = f"https://ballchasing.com/api/groups/{group_id}"
    headers = {"Authorization": token}
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    data = response.json()
    df = pd.json_normalize(data, record_path=["teams"])
    return df

In [22]:
def process():
    df = fetch_playoff_player_stats(config._playoff_group_url)
    
    features_to_keep = [
        "name",
        "team",
        "game_average.core.score",
        "game_average.core.goals",
        "game_average.core.assists",
        "game_average.core.saves",
        "game_average.core.shots",
        "game_average.core.shooting_percentage",
        "game_average.demo.inflicted",
        "game_average.demo.taken",
        "game_average.boost.amount_stolen_big",
        "game_average.boost.amount_stolen_small",
    ]

    df_final = df[features_to_keep].copy()
    df_final.columns = ["Player", "Team", "Avg Score", "Goals Per Game", "Assists Per Game", "Saves Per Game", "Shots Per Game", "Shooting %", "Demos Inf. Per Game", "Demos Taken Per Game", "Big Boost Stolen", "Small Boost Stolen"]

    df_final2 = df_final.copy()
    df_final2 = df_final2.drop("Team", axis=1)
    logging.info("Calculating Z-scores and Dominance Quotient for players...")
    df_final2["Avg Score Zscore"] = round(zscore(df_final2["Avg Score"]) * config.avg_score, 2)
    df_final2["Goals Per Game Zscore"] = round(zscore(df_final2["Goals Per Game"]) * config.goals_per_game, 2)
    df_final2["Assists Per Game Zscore"] = round(zscore(df_final2["Assists Per Game"]) * config.assists_per_game, 2)
    df_final2["Saves Per Game Zscore"] = round(zscore(df_final2["Saves Per Game"]) * config.saves_per_game, 2)
    df_final2["Shots Per Game Zscore"] = round(zscore(df_final2["Shots Per Game"]) * config.shots_per_game, 2)
    df_final2["Demos Inf. Per Game Zscore"] = round(zscore(df_final2["Demos Inf. Per Game"]) * config.demos_per_games, 2)
    df_final2["Demos Taken Per Game Zscore"] = round(zscore(df_final2["Demos Taken Per Game"]) * config.demos_taken_per_game, 2)
    df_final2["Big Boost Stolen Zscore"] = round(zscore(df_final2["Big Boost Stolen"]) * config.count_big_pads_stolen_per_game, 2)
    df_final2["Small Boost Stolen Zscore"] = round(zscore(df_final2["Small Boost Stolen"]) * config.count_small_pads_stolen_per_game, 2)
    df_final2["Shooting %"] = df_final2["Shooting %"] / 100

    # Calculate Dominance Quotient
    dq_summation = [i for i in df_final2.columns.tolist() if "Zscore" in i]
    df_final2["Dominance Quotient"] = df_final2[dq_summation].sum(axis=1) * 50

    df_final2 = df_final2[["Player", "Dominance Quotient", "Avg Score", "Goals Per Game", "Assists Per Game", "Saves Per Game", "Shots Per Game", "Shooting %", "Demos Inf. Per Game", "Demos Taken Per Game", "Big Boost Stolen", "Small Boost Stolen"]]

    df_final2 = df_final2.sort_values(by="Dominance Quotient", ascending=False).reset_index(drop=True)
    df_final2.index += 1

    return df_final2, df_final

In [23]:
def filter_player_data():
    """Filter the data."""
    # Load the data
    df_final2, df_final = process()

    logging.info("Calculating Z-scores and Dominance Quotient for players...")
    df_final2["Avg Score Zscore"] = round(zscore(df_final2["Avg Score"]) * config.avg_score, 2)
    df_final2["Goals Per Game Zscore"] = round(zscore(df_final2["Goals Per Game"]) * config.goals_per_game, 2)
    df_final2["Assists Per Game Zscore"] = round(zscore(df_final2["Assists Per Game"]) * config.assists_per_game, 2)
    df_final2["Saves Per Game Zscore"] = round(zscore(df_final2["Saves Per Game"]) * config.saves_per_game, 2)
    df_final2["Shots Per Game Zscore"] = round(zscore(df_final2["Shots Per Game"]) * config.shots_per_game, 2)
    df_final2["Demos Inf. Per Game Zscore"] = round(zscore(df_final2["Demos Inf. Per Game"]) * config.demos_per_games, 2)
    df_final2["Demos Taken Per Game Zscore"] = round(zscore(df_final2["Demos Taken Per Game"]) * config.demos_taken_per_game, 2)
    df_final2["Big Boost Stolen Zscore"] = round(zscore(df_final2["Big Boost Stolen"]) * config.count_big_pads_stolen_per_game, 2)
    df_final2["Small Boost Stolen Zscore"] = round(zscore(df_final2["Small Boost Stolen"]) * config.count_small_pads_stolen_per_game, 2)

    # for styling purposes
    df_final2["Shooting %"] = df_final2["Shooting %"] / 100

    # Calculate Dominance Quotient
    dq_summation = [i for i in df_final2.columns.tolist() if "Zscore" in i]
    df_final2["Dominance Quotient"] = df_final2[dq_summation].sum(axis=1) * 50
    df_final2 = df_final2[["Player", "Dominance Quotient", "Avg Score", "Goals Per Game", "Assists Per Game", "Saves Per Game", "Shots Per Game", "Shooting %", "Demos Inf. Per Game", "Demos Taken Per Game", "Big Boost Stolen", "Small Boost Stolen"]]
    df_final2 = df_final2.sort_values(by="Dominance Quotient", ascending=False).reset_index(drop=True)
    df_final2.index += 1

    # Save the cleaned data to a parquet file
    df_final2.to_parquet("../data/parquet/playoff_player_data_season_2.parquet")

    # highlighted table
    styled_player_df = make_highlighted_table(df_final2)
    dfi.export(styled_player_df, "../images/playoff_player_data_season_2.png")
    logging.info(f"Player DataFrame image exported")

    return styled_player_df, df_final2, df_final

In [24]:
def filter_team_data():
    """Filter the data."""
    team_df = fetch_playoff_team_stats(group_id=config._playoff_group_url)
    styled_player_data, df_final2, df_final = filter_player_data()
    # Load the data
    team_df = team_df.sort_values(by="name").reset_index(drop=True)
    team_df["Goal Diff"] = team_df["cumulative.core.goals"] - team_df["cumulative.core.goals_against"]
    team_df["Demo Diff"] = team_df["cumulative.demo.inflicted"] - team_df["cumulative.demo.taken"]
    team_df["Shots Diff"] = team_df["cumulative.core.shots"] - team_df["cumulative.core.shots_against"]
    team_df["Win % Zscore"] = zscore(team_df["cumulative.win_percentage"]) * config.win_perc_weight
    team_df["Goal Diff Zscore"] = zscore(team_df["Goal Diff"]) * config.goal_diff_weight
    team_df["Demo Diff Zscore"] = zscore(team_df["Demo Diff"]) * config.demo_diff_weight
    team_df["Shots Diff Zscore"] = zscore(team_df["Shots Diff"]) * config.shot_diff_weight

    # Calculate EPI Score
    team_df["EPI Score"] = team_df[[
        "Win % Zscore", 
        "Goal Diff Zscore", 
        "Demo Diff Zscore", 
        "Shots Diff Zscore", 
        # "Team Played Win % Zscore"
    ]].sum(axis=1)

    features_of_interest = ["name", "cumulative.win_percentage", "game_average.core.goals", "game_average.core.goals_against", "cumulative.core.goals", 
                        "cumulative.core.goals_against", "game_average.core.shots", "game_average.core.shots_against", "cumulative.core.shots", "cumulative.core.shots_against", "EPI Score"
                        ]
    
    team_df = team_df[features_of_interest]

    team_df["Goal Diff"] = team_df.loc[:, "cumulative.core.goals"] - team_df.loc[:, "cumulative.core.goals_against"]
    team_df["Shot Diff"] = team_df.loc[:, "cumulative.core.shots"] - team_df.loc[:, "cumulative.core.shots_against"]

    team_df = team_df.drop(columns=["cumulative.core.goals", "cumulative.core.goals_against", "cumulative.core.shots", "cumulative.core.shots_against"], axis=1)

    team_df.columns = ["Team", "Win %", "Goals For", "Goals Against", "Shots For", "Shots Against", "EPI Score", "Goal Diff", "Shot Diff"]

    team_df = team_df[["Team", "EPI Score", "Win %", "Goals For", "Goals Against", "Goal Diff", "Shots For", "Shots Against", "Shot Diff"]]

    df_final3 = df_final.copy()
    df_final3["Dominance Quotient"] = df_final3["Player"].map(df_final2.set_index("Player")["Dominance Quotient"])

    # Calculate Roster Rating
    player_dq_summation = df_final3.groupby("Team")["Dominance Quotient"].sum().reset_index()
    player_dq_summation_zipped = dict(zip(player_dq_summation["Team"], player_dq_summation["Dominance Quotient"]))
    df_final3["Roster Rating"] = df_final3["Team"].map(player_dq_summation_zipped)

    df_final3 = df_final3.drop("Player", axis=1)
    df_final3 = df_final3.groupby("Team")["Roster Rating"].mean().reset_index()

    df_final3 = df_final3.sort_values(by="Roster Rating", ascending=False).reset_index(drop=True)

    team_df = team_df.merge(df_final3, on="Team", how="left")

    team_df["EPI Score"] = round(team_df["EPI Score"] * 50, 2)

    team_df = team_df[["Team", "EPI Score", "Roster Rating", "Win %", "Goals For", "Goals Against", "Goal Diff", "Shots For", "Shots Against", "Shot Diff"]]

    team_df["Goal Diff"] = team_df["Goal Diff"].apply(lambda x: str(f"{x}") if x < 0 else f"+{x}")
    team_df["Shot Diff"] = team_df["Shot Diff"].apply(lambda x: str(f"{x}") if x < 0 else f"+{x}")

    # round win% to no decimal places
    team_df["Win %"] = round(team_df["Win %"]).astype(int)
    team_df["Win %"] = team_df["Win %"].apply(lambda x: f"{x}%")

    for col in team_df.select_dtypes(include="number"):
        if col != "EPI Score":
            team_df[col] = team_df[col].round(2)
        else:
            continue

    team_df = team_df.sort_values(by="EPI Score", ascending=False).reset_index(drop=True)
    team_df.index += 1

    team_df.to_parquet("../data/parquet/playoff_team_data_season_2.parquet")
    team_df.to_csv("../data/parquet/playoff_team_data_season_2.csv")

    styled_team_df = team_styled_table(team_df)
    dfi.export(styled_team_df, "../images/playoff_team_data_season_2.png")

    return styled_team_df, styled_player_data

In [25]:
styled_team_df, styled_player_data = filter_team_data()

In [26]:
styled_team_df

Unnamed: 0,Team,EPI Score,Roster Rating,Win %,Goals For,Goals Against,Goal Diff,Shots For,Shots Against,Shot Diff
1,MINORITIES,90.67,52.0,89%,2.78,1.33,13,8.11,6.22,17
2,EXECUTIVE PROJEC,11.8,-12.5,55%,2.18,1.91,3,7.27,9.0,-19
3,WONDER PETS,9.88,-52.0,50%,1.75,1.58,2,7.75,8.17,-5
4,KILLER BS,2.42,24.5,50%,2.0,2.3,-3,9.2,8.0,12
5,DIDDLERS,-3.59,-16.5,45%,1.73,1.91,-2,8.18,7.27,10
6,SCAVS,-13.2,14.5,44%,2.11,2.56,-4,8.0,9.22,-11
7,PUSHIN PULLIS,-47.42,-4.0,20%,1.6,2.0,-2,6.6,9.6,-15
8,BECKYARDIGANS,-50.56,-7.5,20%,1.8,3.2,-7,10.4,8.2,11


In [27]:
styled_player_data

Unnamed: 0,Player,Dominance Quotient,Avg Score,Goals Per Game,Assists Per Game,Saves Per Game,Shots Per Game,Shooting %,Demos Inf. Per Game,Demos Taken Per Game,Big Boost Stolen,Small Boost Stolen
1,jeridinho.,79.5,613.0,1.27,0.27,3.55,3.73,0.34%,1.0,0.64,404.82,202.91
2,nyk,53.5,490.2,1.2,0.4,1.8,4.2,0.29%,1.4,0.6,471.4,214.4
3,Bax,42.5,485.56,0.56,1.11,2.44,3.44,0.16%,1.22,1.11,248.11,244.78
4,Paperclip94,39.0,434.44,1.0,0.56,1.67,4.0,0.25%,1.44,1.33,231.33,235.44
5,desi,37.5,481.75,0.75,0.58,1.92,4.17,0.18%,1.83,0.75,299.08,318.83
6,Pullis.,16.0,427.2,0.4,0.8,2.4,2.4,0.17%,0.8,2.2,289.4,251.8
7,Xero,16.0,392.82,0.64,0.64,1.64,3.18,0.20%,1.18,1.64,400.91,239.73
8,Kyzeke,14.5,385.7,0.8,0.5,1.5,3.2,0.25%,1.3,1.3,278.6,264.6
9,buckystyle,14.0,401.8,0.6,0.5,2.0,3.5,0.17%,1.5,1.0,427.6,236.2
10,Ankles,9.5,291.89,0.78,1.22,0.44,2.0,0.39%,1.33,1.22,356.89,228.89
