In [8]:
package_paths = [r"C:\Users\benja\Documents\projects\goalscorers"]
import sys

for path in package_paths:
    sys.path.append(path)
from goalscorer_package.constants import *
import goalscorer_package.data_cleaning as dc
import datetime as dt
import pandas as pd
import numpy as np
import scipy
import glob

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", 1000)
pd.set_option("display.max_colwidth", 100)
pd.options.display.float_format = "{: ,.3f}".format

# Input

In [9]:
season_league = SeasonLeague(SEASON_20_21, GERMAN_BUNDESLIGA, xg_league_bool=True)

In [10]:
def add_datetime(df: pd.DataFrame) -> pd.DataFrame:
    return df.assign(
        # Convert "date" and "time" columns to datetime format
        date=lambda x: pd.to_datetime(x.Date, format="%d/%m/%Y"),
        time=lambda x: pd.to_datetime(x.Time, format="%H:%M").dt.time,
        # Combine "date" and "time" columns to create "datetime" columns
        datetime=lambda x: pd.to_datetime(
            x.date.astype(str) + " " + x.time.astype(str)
        ),
    )


def add_fbref_team_names(df: pd.DataFrame) -> pd.DataFrame:
    df_home_team_name_map = pd.DataFrame(
        {
            "team_home": FOOTBALL_DATA_TO_FBREF_TEAM_NAME_MAP.keys(),
            "home_team": FOOTBALL_DATA_TO_FBREF_TEAM_NAME_MAP.values(),
        }
    )
    df_away_team_name_map = pd.DataFrame(
        {
            "team_away": FOOTBALL_DATA_TO_FBREF_TEAM_NAME_MAP.keys(),
            "away_team": FOOTBALL_DATA_TO_FBREF_TEAM_NAME_MAP.values(),
        }
    )
    return (
        df.merge(df_home_team_name_map, how="left", on=["team_home"], validate="m:1")
        .merge(df_away_team_name_map, how="left", on=["team_away"], validate="m:1")
        .assign(
            home_team=lambda x: x.home_team.fillna(x.team_home),
            away_team=lambda x: x.away_team.fillna(x.team_away),
        )
    )

In [11]:
def data(season_league: SeasonLeague) -> pd.DataFrame:
    season, comp_id = season_league.season.season_str, season_league.league.league_id

    file_path = (
        FilePath.FOOTBALL_DATA_RAW + f"{season}-league-{comp_id}-historic-odds.csv"
    )
    # file_path = FilePath.FOOTBALL_DATA_RAW + "2020-20201-league-9-historic-odds.csv"

    df = pd.read_csv(file_path)
    df = add_datetime(df)
    return df

In [12]:
df = data(season_league)

In [23]:
home_team = "Bayern Munich"
away_team = "Augsburg"
np.isnan(df.query("(HomeTeam == @home_team) & (AwayTeam == @away_team)")["P>2.5"].values[0])

True