In [None]:
import statsapi

FANGRAPHS_TEAM_ID = {
    "Arizona Diamondbacks": {'abbr': 'ARI', 'team_num': 15},
    "Atlanta Braves": {'abbr': 'ATL', 'team_num': 16},
    "Baltimore Orioles": {'abbr': 'BAL', 'team_num': 2},
    "Boston Red Sox": {'abbr': 'BOS', 'team_num': 3},
    "Chicago White Sox": {'abbr': 'CHW', 'team_num': 4},
    "Chicago Cubs": {'abbr': 'CHC', 'team_num': 17},
    "Cincinnati Reds": {'abbr': 'CIN', 'team_num': 18},
    "Cleveland Guardians": {'abbr': 'CLE', 'team_num': 5},
    "Colorado Rockies": {'abbr': 'COL', 'team_num': 19},
    "Detroit Tigers": {'abbr': 'DET', 'team_num': 6},
    "Houston Astros": {'abbr': 'HOU', 'team_num': 21},
    "Kansas City Royals": {'abbr': 'KCR', 'team_num': 7},
    "Los Angeles Angels": {'abbr': 'LAA', 'team_num': 1},
    "Los Angeles Dodgers": {'abbr': 'LAD', 'team_num': 22},
    "Miami Marlins": {'abbr': 'MIA', 'team_num': 20},
    "Milwaukee Brewers": {'abbr': 'MIL', 'team_num': 23},
    "Minnesota Twins": {'abbr': 'MIN', 'team_num': 8},
    "New York Yankees": {'abbr': 'NYY', 'team_num': 9},
    "New York Mets": {'abbr': 'NYM', 'team_num': 25},
    "Oakland Athletics": {'abbr': 'OAK', 'team_num': 10},
    "Philadelphia Phillies": {'abbr': 'PHI', 'team_num': 26},
    "Pittsburgh Pirates": {'abbr': 'PIT', 'team_num': 27},
    "San Diego Padres": {'abbr': 'SDP', 'team_num': 29},
    "San Francisco Giants": {'abbr': 'SFG', 'team_num': 30},
    "Seattle Mariners": {'abbr': 'SEA', 'team_num': 11},
    "St. Louis Cardinals": {'abbr': 'STL', 'team_num': 28},
    "Tampa Bay Rays": {'abbr': 'TBR', 'team_num': 12},
    "Texas Rangers": {'abbr': 'TEX', 'team_num': 13},
    "Toronto Blue Jays": {'abbr': 'TOR', 'team_num': 14},
    "Washington Nationals": {'abbr': 'WSN', 'team_num': 24},
    "Montreal Expos": {'abbr': 'MON', 'team_num': 24},
    "Cleveland Indians": {'abbr': 'CLE', 'team_num': 5},
    "Tampa Bay Devil Rays": {'abbr': 'TBD', 'team_num': 12},
    "Anaheim Angels": {'abbr': 'ANA', 'team_num': 1},
    "Florida Marlins": {'abbr': 'FLA', 'team_num': 20},
}

def get_team_id(team_name):
    """
    Get the Fangraphs team ID for a given MLB team name.
    """
    if team_name in FANGRAPHS_TEAM_ID:
        return FANGRAPHS_TEAM_ID[team_name]['team_num']
    else:
        raise ValueError(f"Team name '{team_name}' not found in Fangraphs team ID mapping.")


def convert_percentage_to_float(percentage_str):
    return float(percentage_str.strip("%")) / 100


def get_player_stats(mlb_player_id, team_df):
    data = statsapi.player_stat_data(mlb_player_id)
    full_name = f"{data['first_name']} {data['last_name']}"
    player_stats = team_df[team_df["Name"] == full_name]
    if len(player_stats) == 0:
        # Return 0s the size of the DF
        player_stats = team_df.iloc[0].copy()
        player_stats[:] = 0

    return player_stats.to_numpy().flatten()[1:]

In [None]:
import requests
import time
import pandas as pd
from bs4 import BeautifulSoup

ADVANCED_HITTING_COLUMNS = [
    "DELETE",
    "Name",
    "DELETE",
    "PA",
    "BB%",
    "K%",
    "BB/K",
    "DELETE",
    "AVG",
    "OBP",
    "SLG",
    "OPS",
    "DELETE",
    "ISO",
    "Spd",
    "BABIP",
    "DELETE",
    "DELETE",
    "DELETE",
    "DELETE",
    "wSB",
    "DELETE",
    "wRC",
    "wRAA",
    "wOBA",
    "wRC+",
]
ADVACNED_HITTING_PERCENT_COLUMNS = ["BB%", "K%"]


def get_batting_season_stats(game_date, team):
    season = game_date.split("-")[0]
    start_date = f"{season}-03-01"
    t_minus_1 = (pd.to_datetime(game_date) - pd.DateOffset(days=1)).date()
    hit_url = f"https://www.fangraphs.com/leaders/major-league?startdate={start_date}&enddate={t_minus_1}&season={season}&season1={season}&month=1000&ind=0&pageitems=200&team={team}&type=1&qual=1"
    print(hit_url)

    team_data = []
    page = None
    while page is None:
        try:
            page = requests.get(hit_url, timeout=15)
        except Exception as e:
            print(f"Error fetching data, retrying after 5 seconds: {e}")
            time.sleep(5)
            continue
    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find("div", class_="table-scroll")
    rows = table.find("tbody").find_all("tr")

    for row in rows:
        team_cell = row.find("td", {"data-stat": "Name"})
        if team_cell:
            row_data = [cell.text.strip() for cell in row.find_all("td")]
            team_data.append(row_data)

    team_df = pd.DataFrame(team_data, columns=ADVANCED_HITTING_COLUMNS)
    team_df = team_df.drop(columns=["DELETE"])

    for col in ADVACNED_HITTING_PERCENT_COLUMNS:
        team_df[col] = team_df[col].apply(convert_percentage_to_float)
    team_df.iloc[:, 1:] = team_df.iloc[:, 1:].apply(pd.to_numeric)
    return team_df


def get_two_year_stats(game_date, team):
    start_date = (pd.to_datetime(game_date) - pd.DateOffset(days=730)).date()
    t_minus_1 = (pd.to_datetime(game_date) - pd.DateOffset(days=1)).date()
    hit_url = f"https://www.fangraphs.com/leaders/major-league?startdate={start_date}&enddate={t_minus_1}&month=1000&ind=0&pageitems=200&team={team}&type=1&qual=1"

    team_data = []
    page = None
    while page is None:
        try:
            page = requests.get(hit_url, timeout=15)
        except Exception as e:
            print(f"Error fetching data, retrying after 5 seconds: {e}")
            time.sleep(5)
            continue
    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find("div", class_="table-scroll")
    rows = table.find("tbody").find_all("tr")

    for row in rows:
        team_cell = row.find("td", {"data-stat": "Name"})
        if team_cell:
            row_data = [cell.text.strip() for cell in row.find_all("td")]
            team_data.append(row_data)
        
    team_df = pd.DataFrame(team_data, columns=ADVANCED_HITTING_COLUMNS)
    team_df = team_df.drop(columns=['DELETE'])

    for col in ADVACNED_HITTING_PERCENT_COLUMNS:
        team_df[col] = team_df[col].apply(convert_percentage_to_float)
    team_df.iloc[:, 1:] = team_df.iloc[:, 1:].apply(pd.to_numeric)
    return team_df    


def get_last_x_batter_stats(game_date, team, x_days_up_to=8):
    season = game_date.split("-")[0]
    t_minus_1 = (pd.to_datetime(game_date) - pd.DateOffset(days=1)).date()
    week_back = (pd.to_datetime(game_date) - pd.DateOffset(days=x_days_up_to)).date()
    hit_url = f"https://www.fangraphs.com/leaders/major-league?startdate={week_back}&enddate={t_minus_1}&season={season}&season1={season}&month=1000&ind=0&pageitems=200&team={team}&type=1&qual=1"

    team_data = []
    page = None
    while page is None:
        try:
            page = requests.get(hit_url, timeout=15)
        except Exception as e:
            print(f"Error fetching data, retrying after 5 seconds: {e}")
            time.sleep(5)
            continue
    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find("div", class_="table-scroll")
    rows = table.find("tbody").find_all("tr")

    for row in rows:
        team_cell = row.find("td", {"data-stat": "Name"})
        if team_cell:
            row_data = [cell.text.strip() for cell in row.find_all("td")]
            team_data.append(row_data)

    team_df = pd.DataFrame(team_data, columns=ADVANCED_HITTING_COLUMNS)
    team_df = team_df.drop(columns=["DELETE"])

    for col in ADVACNED_HITTING_PERCENT_COLUMNS:
        team_df[col] = team_df[col].apply(convert_percentage_to_float)
    team_df.iloc[:, 1:] = team_df.iloc[:, 1:].apply(pd.to_numeric)
    return team_df


# yordon = 670541
# last_x = get_last_x_batter_stats("2025-04-30", 21)
# season = get_batting_season_stats("2025-04-30", 21)
# yordon_stats = get_player_stats(yordon, season)
# get_two_year_stats("2025-04-30", 21)
get_batting_season_stats("2017-04-01", 20)

In [None]:
import requests
import time
import pandas as pd
from bs4 import BeautifulSoup

ADVANCED_PITCHING_COLUMNS = [
    "DELETE",
    "Name",
    "DELETE",
    "K/9",
    "BB/9",
    "K/BB",
    "HR/9",
    "DELETE",
    "K%",
    "BB%",
    "K-BB%",
    "DELETE",
    "AVG",
    "WHIP",
    "BABIP",
    "LOB%",
    "DELETE",
    "ERA-",
    "FIP-",
    "xFIP-",
    "DELETE",
    "ERA",
    "FIP",
    "E-F",
    "DELETE",
    "xFIP",
    "SIERA",
]
ADVANCED_PITCHING_PERCENTAGE_COLUMNS = ["K%", "BB%", "K-BB%", "LOB%"]


def get_pitching_season_stats(game_date, team, type="starter"):
    if type not in ["starter", "reliever"]:
        raise ValueError("Type must be either 'starter' or 'reliever'")
    type = type[0:3]
    season = game_date.split("-")[0]
    start_date = f"{season}-03-01"
    t_minus_1 = (pd.to_datetime(game_date) - pd.DateOffset(days=1)).date()
    hit_url = f"https://www.fangraphs.com/leaders/major-league?startdate={start_date}&enddate={t_minus_1}&season={season}&season1={season}&month=1000&ind=0&pageitems=200&team={team}&type=1&qual=1&stats={type}"

    team_data = []
    page = None
    while page is None:
        try:
            page = requests.get(hit_url, timeout=15)
        except Exception as e:
            print(f"Error fetching data, retrying after 5 seconds: {e}")
            time.sleep(5)
            continue
    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find("div", class_="table-scroll")
    rows = table.find("tbody").find_all("tr")

    for row in rows:
        team_cell = row.find("td", {"data-stat": "Name"})
        if team_cell:
            row_data = [cell.text.strip() for cell in row.find_all("td")]
            team_data.append(row_data)

    team_df = pd.DataFrame(team_data, columns=ADVANCED_PITCHING_COLUMNS)
    team_df = team_df.drop(columns=["DELETE"])
    for col in ADVANCED_PITCHING_PERCENTAGE_COLUMNS:
        team_df[col] = team_df[col].apply(convert_percentage_to_float)
    team_df.iloc[:, 1:] = team_df.iloc[:, 1:].apply(pd.to_numeric)
    return team_df


def get_last_x_starting_pitcher_stats(game_date, team, x_days_up_to=8, type="starter"):
    if type not in ["starter", "reliever"]:
        raise ValueError("Type must be either 'starter' or 'reliever'")
    type = type[0:3]
    season = game_date.split("-")[0]
    t_minus_1 = (pd.to_datetime(game_date) - pd.DateOffset(days=1)).date()
    x_back = (pd.to_datetime(game_date) - pd.DateOffset(days=x_days_up_to)).date()
    hit_url = f"https://www.fangraphs.com/leaders/major-league?startdate={x_back}&enddate={t_minus_1}&season={season}&season1={season}&month=1000&ind=0&pageitems=200&team={team}&type=1&qual=1&stats={type}"

    team_data = []
    page = None
    while page is None:
        try:
            page = requests.get(hit_url, timeout=15)
        except Exception as e:
            print(f"Error fetching data, retrying after 5 seconds: {e}")
            time.sleep(5)
            continue
    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find("div", class_="table-scroll")
    rows = table.find("tbody").find_all("tr")

    for row in rows:
        team_cell = row.find("td", {"data-stat": "Name"})
        if team_cell:
            row_data = [cell.text.strip() for cell in row.find_all("td")]
            team_data.append(row_data)

    team_df = pd.DataFrame(team_data, columns=ADVANCED_PITCHING_COLUMNS)
    team_df = team_df.drop(columns=["DELETE"])

    for col in ADVANCED_PITCHING_PERCENTAGE_COLUMNS:
        team_df[col] = team_df[col].apply(convert_percentage_to_float)
    team_df.iloc[:, 1:] = team_df.iloc[:, 1:].apply(pd.to_numeric)
    return team_df


def remove_bullpen_yesterday(bullpen_long_time, bullpen_yesterday):
    # remove all from bullpen_long_time that are in bullpen_yesterday
    return bullpen_long_time[~bullpen_long_time["Name"].isin(bullpen_yesterday["Name"])]


def get_pitcher_team_features(pitcher_df):
    # Drop the name column and average the results
    dropped = pitcher_df.drop(columns=["Name"])
    return dropped.mean(axis=0).to_numpy().flatten()


game_date = "2025-05-04"
overall_bullpen_month = get_last_x_starting_pitcher_stats(game_date, 21, 31, "reliever")
bullpen_last_week = get_last_x_starting_pitcher_stats(game_date, 21, 8, "reliever")
bullpen_yesterday = get_last_x_starting_pitcher_stats(game_date, 21, 1, "reliever")
bullpen_fresh_week = remove_bullpen_yesterday(bullpen_last_week, bullpen_yesterday)
print(get_pitcher_team_features(bullpen_yesterday))


In [None]:
starters_biweekly = get_last_x_starting_pitcher_stats(game_date, 21, 14, "starter")
starters_season = get_pitching_season_stats(game_date, 21, "starter")
print(starters_season)

hunter_brown = 686613
season_stats = get_player_stats(hunter_brown, starters_season)
print(season_stats)
biweekly_stats = get_player_stats(hunter_brown, starters_biweekly)
print(biweekly_stats)