In [39]:
import requests
import pandas as pd

In [40]:
def fetch_mlb_batter_stats(season, league_id, limit=1000):
    # API endpoint to get the list of players
    url = "https://statsapi.mlb.com/api/v1/stats"

    # Define the parameters for the API call
    params = {
        "stats": "season",           # 통계 유형
        "group": "hitting",          # 타격 통계
        "season": season,            # 시즌 연도
        "leagueId": league_id,       # 리그 (103 - AL, 104 - NL)
        "gameType": "R",             # 정규 시즌
        "limit": limit               # 가져올 선수 수의 제한 설정
    }

    # Make the API request
    response = requests.get(url, params=params)

    # Check if the request was successful
    if response.status_code != 200:
        print(f"Error response: {response.text}")  # Print the response text for debugging
        raise Exception(f"API request failed with status code {response.status_code}")

    # Parse the JSON response
    data = response.json()

    # Extract player stats
    player_stats = data['stats'][0]['splits']

    # Create a list of dictionaries with player stats
    players = []
    for player in player_stats:
        player_info = {
            "Season": season,                 # 시즌 정보 추가
            "PlayerID": player['player']['id'],
            "PlayerName": player['player']['fullName'],
            "Team": player['team']['name'],
            "GamesPlayed": player['stat']['gamesPlayed'],
            "AtBats": player['stat']['atBats'],
            "Runs": player['stat']['runs'],
            "Hits": player['stat']['hits'],
            "HomeRuns": player['stat']['homeRuns'],
            "RBIs": player['stat']['rbi'],
            "StolenBases": player['stat']['stolenBases'],
            "Walks": player['stat']['baseOnBalls'],
            "StrikeOuts": player['stat']['strikeOuts'],
            "BattingAverage": player['stat']['avg'],
            "OnBasePercentage": player['stat']['obp'],
            "SluggingPercentage": player['stat']['slg'],
            "OPS": player['stat']['ops']
        }
        players.append(player_info)

    # Convert the list of dictionaries to a DataFrame
    return pd.DataFrame(players)

In [41]:
# Initialize an empty DataFrame to hold all data
all_seasons_df = pd.DataFrame()

# Loop through each season from 2000 to 2023
for season in range(2000, 2023 + 1):
    try:
        # Fetch data for both leagues and combine
        al_batter_stats_df = fetch_mlb_batter_stats(season, 103, limit=1000)
        nl_batter_stats_df = fetch_mlb_batter_stats(season, 104, limit=1000)
        season_stats_df = pd.concat([al_batter_stats_df, nl_batter_stats_df], ignore_index=True)
        
        # Append the season data to the all_seasons_df
        all_seasons_df = pd.concat([all_seasons_df, season_stats_df], ignore_index=True)
        print(f"Data for season {season} fetched successfully.")
    except Exception as e:
        print(f"Failed to fetch data for season {season}: {e}")

Data for season 2000 fetched successfully.
Data for season 2001 fetched successfully.
Data for season 2002 fetched successfully.
Data for season 2003 fetched successfully.
Data for season 2004 fetched successfully.
Data for season 2005 fetched successfully.
Data for season 2006 fetched successfully.
Data for season 2007 fetched successfully.
Data for season 2008 fetched successfully.
Data for season 2009 fetched successfully.
Data for season 2010 fetched successfully.
Data for season 2011 fetched successfully.
Data for season 2012 fetched successfully.
Data for season 2013 fetched successfully.
Data for season 2014 fetched successfully.
Data for season 2015 fetched successfully.
Data for season 2016 fetched successfully.
Data for season 2017 fetched successfully.
Data for season 2018 fetched successfully.
Data for season 2019 fetched successfully.
Data for season 2020 fetched successfully.
Data for season 2021 fetched successfully.
Data for season 2022 fetched successfully.
Data for se

In [42]:
# Display the final DataFrame
print(all_seasons_df.head())
print(f"Total players from 2000 to 2023: {len(all_seasons_df)}")

# Save the DataFrame to a CSV file for further use
all_seasons_df.to_csv("mlb_batter_stats_2000_2023.csv", index=False)

   Season  PlayerID         PlayerName               Team  GamesPlayed  \
0    2000    114596  Nomar Garciaparra     Boston Red Sox          140   
1    2000    113889       Darin Erstad     Anaheim Angels          157   
2    2000    120903      Manny Ramirez  Cleveland Indians          118   
3    2000    113232     Carlos Delgado  Toronto Blue Jays          162   
4    2000    116539        Derek Jeter   New York Yankees          148   

   AtBats  Runs  Hits  HomeRuns  RBIs  StolenBases  Walks  StrikeOuts  \
0     529   104   197        21    96            5     61          50   
1     676   121   240        25   100           28     64          82   
2     439    92   154        38   122            1     86         117   
3     569   115   196        41   137            0    123         104   
4     593   119   201        15    73           22     68          99   

  BattingAverage OnBasePercentage SluggingPercentage    OPS  
0           .372             .434               .599  