### German BBL Stats Dataset

* Scrape Players Stats data for **German BBL** 
* Change data type
* Rename positions
* Add new column and fill each row with league name
* Export to CSV

In [1]:
# Scrape Players Stats for German BBL

import requests
from bs4 import BeautifulSoup
import pandas as pd

pd.set_option('display.max_columns', None)  # so we can see all columns in a wide DataFrame

years = list(range(2019, 2025))
url_start = "https://basketball.realgm.com/international/league/15/German-BBL/stats/{}/Per_48/Qualified/All/points/{}/desc/1/Regular_Season"

data = []  # List to store parsed data

for year in years:
    for position in ['PG', 'SG', 'SF', 'PF', 'C']:
        url = url_start.format(year, position)  # Properly formatted URL
        response = requests.get(url)
        if not response.ok:
            print(f"Failed to fetch data for year {year}, position {position}")
            continue
        soup = BeautifulSoup(response.content, 'html.parser')

        rows = soup.select('.tablesaw tbody tr')
        print(f"Year: {year}, Position: {position}, Rows: {len(rows)}")  # Debugging print

        for row in rows:
            player = [year, position]  # Add the year and position to the player data
            player += [cell.text.strip() for cell in row.find_all('td')]
            data.append(player)

# Creating DataFrame from the collected data
columns = ['Year', 'Position', '#', 'Player', 'Team', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
           'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']

df = pd.DataFrame(data, columns=columns)

# # Saving DataFrame to a CSV file
# df.to_csv('euroleague_stats.csv', index=False)

# Display DataFrame
# df # Displaying just the first few rows for clarity


Year: 2019, Position: PG, Rows: 28
Year: 2019, Position: SG, Rows: 29
Year: 2019, Position: SF, Rows: 28
Year: 2019, Position: PF, Rows: 25
Year: 2019, Position: C, Rows: 22
Year: 2020, Position: PG, Rows: 43
Year: 2020, Position: SG, Rows: 47
Year: 2020, Position: SF, Rows: 41
Year: 2020, Position: PF, Rows: 33
Year: 2020, Position: C, Rows: 32
Year: 2021, Position: PG, Rows: 51
Year: 2021, Position: SG, Rows: 55
Year: 2021, Position: SF, Rows: 50
Year: 2021, Position: PF, Rows: 48
Year: 2021, Position: C, Rows: 36
Year: 2022, Position: PG, Rows: 52
Year: 2022, Position: SG, Rows: 52
Year: 2022, Position: SF, Rows: 56
Year: 2022, Position: PF, Rows: 43
Year: 2022, Position: C, Rows: 33
Year: 2023, Position: PG, Rows: 45
Year: 2023, Position: SG, Rows: 50
Year: 2023, Position: SF, Rows: 51
Year: 2023, Position: PF, Rows: 43
Year: 2023, Position: C, Rows: 32
Year: 2024, Position: PG, Rows: 44
Year: 2024, Position: SG, Rows: 48
Year: 2024, Position: SF, Rows: 52
Year: 2024, Position: PF,

In [2]:
# Check data type for columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1236 entries, 0 to 1235
Data columns (total 25 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Year      1236 non-null   int64 
 1   Position  1236 non-null   object
 2   #         1236 non-null   object
 3   Player    1236 non-null   object
 4   Team      1236 non-null   object
 5   GP        1236 non-null   object
 6   MPG       1236 non-null   object
 7   PPG       1236 non-null   object
 8   FGM       1236 non-null   object
 9   FGA       1236 non-null   object
 10  FG%       1236 non-null   object
 11  3PM       1236 non-null   object
 12  3PA       1236 non-null   object
 13  3P%       1236 non-null   object
 14  FTM       1236 non-null   object
 15  FTA       1236 non-null   object
 16  FT%       1236 non-null   object
 17  ORB       1236 non-null   object
 18  DRB       1236 non-null   object
 19  RPG       1236 non-null   object
 20  APG       1236 non-null   object
 21  SPG       1236

In [3]:
# List of columns to be changed from object to float
floats = ['MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']
for column in floats:
    df[column] = pd.to_numeric(df[column], errors='raise')

# List of columns to be changed from object to int
integers = ['#', 'GP']
for column in integers:
    df[column] = pd.to_numeric(df[column], errors='raise').astype('Int64')

df.head()


Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2019,PG,1,Will Cummings,EWE,37,30.1,33.9,11.0,21.8,0.507,2.5,6.8,0.369,9.4,11.3,0.828,0.7,4.2,5.0,6.8,2.7,0.2,3.2,3.8
1,2019,PG,2,Deandre Lansdowne,BLB,34,32.2,27.7,10.0,22.6,0.44,3.8,10.1,0.374,4.0,5.2,0.78,1.2,3.8,5.0,5.4,2.3,0.2,3.1,3.9
2,2019,PG,3,Tyrese Rice,BRO,37,28.8,26.4,8.8,19.7,0.446,4.0,10.8,0.371,4.9,5.8,0.837,0.5,3.2,3.7,10.5,1.2,0.0,4.7,3.2
3,2019,PG,4,Michael Stockton,GOE,33,30.0,26.2,9.1,19.7,0.463,2.1,5.5,0.389,5.9,6.7,0.871,0.8,4.1,4.9,11.2,1.7,0.1,4.6,4.4
4,2019,PG,5,Max Landis,GIE,28,22.1,25.6,7.6,17.0,0.445,4.0,9.2,0.437,6.4,7.2,0.892,0.4,2.0,2.4,5.0,0.9,0.0,2.7,5.1


In [4]:
# Rename positions to match NBA Stats
df['Position'] = df['Position'].replace(['PG', 'SG'], 'G')
df['Position'] = df['Position'].replace(['SF', 'PF'], 'F')

In [5]:
# Convert stats from 48 minutes to 100 possessions
df.loc[:, 'MPG':] = df.loc[:,'MPG':].mul(1.25)

In [78]:
# Saving DataFrame to a CSV file
df.to_csv('bbl_stats.csv', index=False)