In [14]:
import pandas as pd

In [15]:
from euroleague_api.player_stats import PlayerStats

In [16]:
import requests

In [17]:
ps = PlayerStats()

In [18]:
print(ps)

<euroleague_api.player_stats.PlayerStats object at 0x105a3bcb0>


In [19]:
 def get_player_stats(
        ps,
        endpoint: "traditional",
        params: dict = {},
        #phase_type_code: [str] = None,
        #statistic_mode: str = "PerGame"
    ) -> pd.DataFrame:
        """
        A wrapper function for getting the players' stats for
        - all seasons
        - a single season
        - a range of seasons

        Args:

            endpoint (str): The type of stats, available variables:
                - traditional
                - advanced
                - misc
                - scoring

            params (Dict[str, Union[str, int]]): A dictionary of parameters
                for the get request.

            phase_type_code (Optional[str], optional): The phase of the season,
                available variables:
                - "RS" (regular season)
                - "PO" (play-off)
                - "FF" (final four)
                Defaults to None, which includes all phases.

            statistic_mode (str, optional): The aggregation of statistics,
                available variables:
                - PerGame
                - Accumulated
                - Per100Possesions
                Defaults to "PerGame".

        Raises:

            ValueError: If the endpoint is not applicable

            ValueError: If the phase_type_code is not applicable

            ValueError: If the statistic_mode is not applicable

        Returns:

            pd.DataFrame: A dataframe with the players' stats.
        """

In [20]:
def get_player_stats(
        self,
        endpoint: str,
        params: dict = {},
        phase_type_code: [str] = None,
        statistic_mode: str = "PerGame"
    ) -> pd.DataFrame:
        """
        A wrapper function for getting the players' stats for
        - all seasons
        - a single season
        - a range of seasons

        Args:

            endpoint (str): The type of stats, available variables:
                - traditional
                - advanced
                - misc
                - scoring

            params (Dict[str, Union[str, int]]): A dictionary of parameters
                for the get request.

            phase_type_code (Optional[str], optional): The phase of the season,
                available variables:
                - "RS" (regular season)
                - "PO" (play-off)
                - "FF" (final four)
                Defaults to None, which includes all phases.

            statistic_mode (str, optional): The aggregation of statistics,
                available variables:
                - PerGame
                - Accumulated
                - Per100Possesions
                Defaults to "PerGame".

        Raises:

            ValueError: If the endpoint is not applicable

            ValueError: If the phase_type_code is not applicable

            ValueError: If the statistic_mode is not applicable

        Returns:

            pd.DataFrame: A dataframe with the players' stats.
        """

        available_endpoints = ["traditional", "advanced", "misc", "scoring"]
        available_phase_type_code = ["RS", "PO", "FF"]
        available_stat_mode = ["PerGame", "Accumulated", "Per100Possesions"]

        

        params["statisticMode"] = statistic_mode
        params["phaseTypeCode"] = phase_type_code
        params["limit"] = 400

        url_ = f"{self.url}/statistics/players/{endpoint}"

        r = requests.get(url_, params=params)
        data = r.json()
        if data["total"] > len(data["players"]):
            params["limit"] = data["total"] + 1
            r = requests.get(url_, params=params)
            data = r.json()
        df = pd.json_normalize(data["players"])
        return df

In [21]:
df1 = get_player_stats(
        ps,
        "advanced"
)

In [22]:
df1.columns

Index(['playerRanking', 'gamesPlayed', 'minutesPlayed',
       'effectiveFieldGoalPercentage', 'trueShootingPercentage',
       'offensiveReboundsPercentage', 'defensiveReboundsPercentage',
       'reboundsPercentage', 'assistsToTurnoversRatio', 'assistsRatio',
       'turnoversRatio', 'twoPointAttemptsRatio', 'threePointAttemptsRatio',
       'freeThrowsRate', 'possesions', 'player.code', 'player.name',
       'player.age', 'player.imageUrl', 'player.team.code',
       'player.team.tvCodes', 'player.team.name', 'player.team.imageUrl'],
      dtype='object')

In [23]:
import pandas as pd

In [49]:
# Define the columns I want to keep
columns_to_keep = [
    'player.name', 'gamesPlayed', 'minutesPlayed',
    'threePointAttemptsRatio', 'trueShootingPercentage', 'assistsToTurnoversRatio', 'reboundsPercentage',
]

In [51]:
# Filter only the needed columns
df_cleaned = df1[columns_to_keep].copy()

In [53]:
# Rename columns

df_cleaned.columns = df_cleaned.columns.map({
    'player.name': 'player_name',
    'gamesPlayed': 'games_played',
    'minutesPlayed': 'mins',
    'threePointAttemptsRatio': '3PA_ratio',
    'trueShootingPercentage': 'true_shooting_pct',
    'assistsToTurnoversRatio': 'assist_to_ratio',
    'reboundsPercentage': 'rebound_pct' 
})


In [55]:
# Drop rows with missing or zero minutes (to remove irrelevant/incomplete records)
df_cleaned = df_cleaned[df_cleaned['mins'] > 0].dropna()

In [57]:
df_cleaned

Unnamed: 0,player_name,games_played,mins,3PA_ratio,true_shooting_pct,assist_to_ratio,rebound_pct
0,"KURUCS, ARTURS",70.0,6.755476,33%,37%,1.8,3.4%
1,"ABI, MUSTAFA",78.0,12.735684,21.4%,49.9%,0.8,5.4%
2,"BOZIC, PETAR",122.0,11.394262,44.3%,51.4%,1.0,5.4%
3,"SCHULTZE, SVEN",45.0,9.455185,34.4%,51.5%,0.6,7.7%
4,"MILOSEVIC, STRAHINJA",55.0,5.666970,10.1%,50.8%,0.7,11.1%
...,...,...,...,...,...,...,...
608,"TURKCAN, MIRSAD",129.0,28.495866,18.4%,57.2%,0.7,21.5%
609,"BEARD, TANOKA",73.0,29.057763,5.2%,58.8%,0.5,19.1%
610,"BLAIR, JOSEPH",65.0,33.773590,0.6%,59.7%,0.6,18.4%
611,"FORD, ALPHONSO",54.0,33.789506,16.9%,60.1%,0.9,7.2%


In [59]:
cols_to_clean = ['3PA_ratio', 'true_shooting_pct', 'rebound_pct']

# Remove '%' and convert to float
for col in cols_to_clean:
    df_cleaned[col] = df_cleaned[col].str.rstrip('%').astype(float)

In [67]:
# Convert games_played to integer
df_cleaned['games_played'] = df_cleaned['games_played'].astype(int)

# Round mins to 2 decimal places
df_cleaned['mins'] = df_cleaned['mins'].round(2)

In [69]:
df_cleaned

Unnamed: 0,player_name,games_played,mins,3PA_ratio,true_shooting_pct,assist_to_ratio,rebound_pct
0,"KURUCS, ARTURS",70,6.76,33.0,37.0,1.8,3.4
1,"ABI, MUSTAFA",78,12.74,21.4,49.9,0.8,5.4
2,"BOZIC, PETAR",122,11.39,44.3,51.4,1.0,5.4
3,"SCHULTZE, SVEN",45,9.46,34.4,51.5,0.6,7.7
4,"MILOSEVIC, STRAHINJA",55,5.67,10.1,50.8,0.7,11.1
...,...,...,...,...,...,...,...
608,"TURKCAN, MIRSAD",129,28.50,18.4,57.2,0.7,21.5
609,"BEARD, TANOKA",73,29.06,5.2,58.8,0.5,19.1
610,"BLAIR, JOSEPH",65,33.77,0.6,59.7,0.6,18.4
611,"FORD, ALPHONSO",54,33.79,16.9,60.1,0.9,7.2


In [71]:
# Extract it to csv

df_cleaned.to_csv('dataset_euroleague.csv', index=False)