### ABA League Stats Dataset

* Scrape Players Stats data  
* Change data type
* Rename positions
* Add new column and fill each row with league name
* Export to CSV

In [2]:
# Scrape Players Stats for German BBL

import requests
from bs4 import BeautifulSoup
import pandas as pd

pd.set_option('display.max_columns', None)  # so we can see all columns in a wide DataFrame

years = list(range(2019, 2025))
url_start = "https://basketball.realgm.com/international/league/18/Adriatic-League-Liga-ABA/stats/{}/Per_48/Qualified/All/points/{}/desc/1/Regular_Season"

data = []  # List to store parsed data

for year in years:
    for position in ['PG', 'SG', 'SF', 'PF', 'C']:
        url = url_start.format(year, position)  # Properly formatted URL
        response = requests.get(url)
        if not response.ok:
            print(f"Failed to fetch data for year {year}, position {position}")
            continue
        soup = BeautifulSoup(response.content, 'html.parser')

        rows = soup.select('.tablesaw tbody tr')
        print(f"Year: {year}, Position: {position}, Rows: {len(rows)}")  # Debugging print

        for row in rows:
            player = [year, position]  # Add the year and position to the player data
            player += [cell.text.strip() for cell in row.find_all('td')]
            data.append(player)

# Creating DataFrame from the collected data
columns = ['Year', 'Position', '#', 'Player', 'Team', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
           'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']

df = pd.DataFrame(data, columns=columns)

# # Saving DataFrame to a CSV file
# df.to_csv('euroleague_stats.csv', index=False)

# Display DataFrame
# df # Displaying just the first few rows for clarity


Year: 2019, Position: PG, Rows: 17
Year: 2019, Position: SG, Rows: 25
Year: 2019, Position: SF, Rows: 23
Year: 2019, Position: PF, Rows: 16
Year: 2019, Position: C, Rows: 11
Year: 2020, Position: PG, Rows: 24
Year: 2020, Position: SG, Rows: 29
Year: 2020, Position: SF, Rows: 27
Year: 2020, Position: PF, Rows: 30
Year: 2020, Position: C, Rows: 25
Year: 2021, Position: PG, Rows: 27
Year: 2021, Position: SG, Rows: 35
Year: 2021, Position: SF, Rows: 35
Year: 2021, Position: PF, Rows: 30
Year: 2021, Position: C, Rows: 27
Year: 2022, Position: PG, Rows: 36
Year: 2022, Position: SG, Rows: 34
Year: 2022, Position: SF, Rows: 37
Year: 2022, Position: PF, Rows: 43
Year: 2022, Position: C, Rows: 26
Year: 2023, Position: PG, Rows: 36
Year: 2023, Position: SG, Rows: 32
Year: 2023, Position: SF, Rows: 38
Year: 2023, Position: PF, Rows: 35
Year: 2023, Position: C, Rows: 28
Year: 2024, Position: PG, Rows: 31
Year: 2024, Position: SG, Rows: 41
Year: 2024, Position: SF, Rows: 28
Year: 2024, Position: PF,

In [3]:
# Check data type for columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 898 entries, 0 to 897
Data columns (total 25 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Year      898 non-null    int64 
 1   Position  898 non-null    object
 2   #         898 non-null    object
 3   Player    898 non-null    object
 4   Team      898 non-null    object
 5   GP        898 non-null    object
 6   MPG       898 non-null    object
 7   PPG       898 non-null    object
 8   FGM       898 non-null    object
 9   FGA       898 non-null    object
 10  FG%       898 non-null    object
 11  3PM       898 non-null    object
 12  3PA       898 non-null    object
 13  3P%       898 non-null    object
 14  FTM       898 non-null    object
 15  FTA       898 non-null    object
 16  FT%       898 non-null    object
 17  ORB       898 non-null    object
 18  DRB       898 non-null    object
 19  RPG       898 non-null    object
 20  APG       898 non-null    object
 21  SPG       898 no

In [4]:
# List of columns to be changed from object to float
floats = ['MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']
for column in floats:
    df[column] = pd.to_numeric(df[column], errors='raise')

# List of columns to be changed from object to int
integers = ['#', 'GP']
for column in integers:
    df[column] = pd.to_numeric(df[column], errors='raise').astype('Int64')

df.head()


Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2019,PG,1,Joe Ragland,ZVE,22,22.7,22.6,8.6,17.5,0.495,2.1,6.7,0.314,3.2,3.7,0.846,0.9,3.6,4.5,12.1,2.3,0.1,5.3,4.6
1,2019,PG,2,Marcus Paige,PAR,25,25.0,22.4,7.8,19.3,0.405,3.8,10.5,0.365,2.9,3.8,0.76,1.2,3.9,5.1,7.8,2.6,0.4,3.1,5.0
2,2019,PG,3,Brandis Raley-Ross,CIB,11,20.9,21.9,7.9,18.3,0.432,1.5,5.2,0.28,4.6,5.4,0.846,1.9,2.9,4.8,4.4,1.9,0.0,2.9,5.6
3,2019,PG,4,Nikola Rebic,MOR,21,24.1,21.3,6.8,14.9,0.459,2.0,5.8,0.344,5.7,7.2,0.789,0.5,3.7,4.2,7.5,1.9,0.2,3.1,3.8
4,2019,PG,5,Jan Span,OLI,21,25.3,21.3,7.1,17.6,0.405,3.8,11.0,0.344,3.2,3.9,0.814,1.0,4.3,5.2,7.2,1.9,0.1,4.7,4.5


In [5]:
# Rename positions to match NBA Stats
df['Position'] = df['Position'].replace(['PG', 'SG'], 'G')
df['Position'] = df['Position'].replace(['SF', 'PF'], 'F')

In [6]:
# Convert stats from 48 minutes to 100 possessions
df.loc[:, 'MPG':] = df.loc[:,'MPG':].mul(1.25)

In [7]:
df

Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2019,G,1,Joe Ragland,ZVE,22,28.375,28.250,10.750,21.875,0.61875,2.625,8.375,0.39250,4.000,4.625,1.05750,1.125,4.500,5.625,15.125,2.875,0.125,6.625,5.750
1,2019,G,2,Marcus Paige,PAR,25,31.250,28.000,9.750,24.125,0.50625,4.750,13.125,0.45625,3.625,4.750,0.95000,1.500,4.875,6.375,9.750,3.250,0.500,3.875,6.250
2,2019,G,3,Brandis Raley-Ross,CIB,11,26.125,27.375,9.875,22.875,0.54000,1.875,6.500,0.35000,5.750,6.750,1.05750,2.375,3.625,6.000,5.500,2.375,0.000,3.625,7.000
3,2019,G,4,Nikola Rebic,MOR,21,30.125,26.625,8.500,18.625,0.57375,2.500,7.250,0.43000,7.125,9.000,0.98625,0.625,4.625,5.250,9.375,2.375,0.250,3.875,4.750
4,2019,G,5,Jan Span,OLI,21,31.625,26.625,8.875,22.000,0.50625,4.750,13.750,0.43000,4.000,4.875,1.01750,1.250,5.375,6.500,9.000,2.375,0.125,5.875,5.625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
893,2024,C,24,Robert Jurkovic,KRK,23,15.000,20.250,8.000,16.750,0.60125,1.125,5.625,0.24000,3.000,4.375,0.87500,4.125,6.750,10.875,2.375,1.125,2.375,2.625,8.000
894,2024,C,25,Bojan Tomasevic,BOR,25,26.750,19.875,7.000,14.875,0.59250,3.625,8.875,0.50625,2.250,2.875,0.96125,3.250,7.250,10.500,1.250,2.000,0.375,1.625,6.500
895,2024,C,26,Filip Rebraca,BOR,25,18.500,19.750,7.500,16.500,0.56375,0.500,2.875,0.20875,4.375,6.125,0.88875,5.000,7.000,12.000,1.625,1.625,0.750,2.750,7.125
896,2024,C,27,Alex Illic,BUD,10,10.750,15.375,7.750,8.375,1.14625,0.000,0.000,0.00000,0.000,0.000,0.00000,4.875,9.125,14.000,2.750,4.875,2.750,0.000,7.750


In [7]:
# Saving DataFrame to a CSV file
df.to_csv('aba_stats.csv', index=False)