### German BBL Players Dataset

* Scrape Players Stats data for **German BBL** 
* Change data type
* Rename positions
* Export to CSV

In [5]:
# Scrape Player Bio in German BBL

import requests
from bs4 import BeautifulSoup
import pandas as pd

pd.set_option('display.max_columns', None)  # so we can see all columns in a wide DataFrame

years = list(range(2019, 2025))

url_start = "https://basketball.realgm.com/international/league/15/German-BBL/players/{}/{}"

data = []  # List to store parsed data
index = ['772', '873', '962', '1056', '1151']
for index, year in zip(index, years):
    url = url_start.format(index, year)  # Properly formatted URL
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    rows = soup.select('.tablesaw tbody tr')
    print(f"Year: {year}, Rows: {len(rows)}")  # Debugging print

    for row in rows:
        player = [year]  # Add the year and position to the player data
        player += [cell.text.strip() for cell in row.find_all('td')]
        data.append(player)

# Extracting column names from the first row of the table
columns = ['Year'] + [header.text.strip() for header in soup.select('.tablesaw thead th')]

# Creating DataFrame from the collected data
df = pd.DataFrame(data, columns=columns)

# Displaying just the first few rows for clarity
df 

Year: 2019, Rows: 325
Year: 2020, Rows: 326
Year: 2021, Rows: 322
Year: 2022, Rows: 310
Year: 2023, Rows: 312


Unnamed: 0,Year,Player,Pos,HT,WT,Team,Birth City,Draft Status,Nationality
0,2019,Alex Abreu,G,5-10,180,Giessen 46ers,Bayamon (PR),"2015 NBA Draft, Undrafted",Puerto Rico
1,2019,Ismet Akpinar,PG,6-4,176,Bayern Munich,Hamburg,"2017 NBA Draft, Undrafted",Germany
2,2019,Jacob Albrecht,SF,6-8,220,BG Goettingen,,"2017 NBA Draft, Undrafted",Germany
3,2019,Julian Albus,SF,6-4,198,s.Oliver Baskets,Tubingen,"2014 NBA Draft, Undrafted",Germany
4,2019,Bryce Alford,G,6-3,185,Medi Bayreuth,Albuquerque (NM),"2017 NBA Draft, Undrafted",United States
...,...,...,...,...,...,...,...,...,...
1590,2023,Matej Zejdl,C,6-9,236,HARKO Merlins Crailsheim,,2025 NBA Draft Eligible,Czech Republic
1591,2023,Paul Zipser,F,6-8,226,MLP Academics Heidelberg,Heidelberg,2016 Rnd 2 Pick 18,Germany
1592,2023,Fedor Zugic,SG,6-6,188,BG Goettingen,Kotor,2025 NBA Draft Eligible,Montenegro
1593,2023,Ferdinand Zylka,G,6-3,170,Basketball Lowen Braunschweig,Berlin,"2020 NBA Draft, Undrafted",Germany


In [8]:
# Check data type for columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1595 entries, 0 to 1594
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Year          1595 non-null   int64 
 1   Player        1595 non-null   object
 2   Pos           1595 non-null   object
 3   HT            1595 non-null   object
 4   WT            1595 non-null   object
 5   Team          1595 non-null   object
 6   Birth City    1595 non-null   object
 7   Draft Status  1595 non-null   object
 8   Nationality   1595 non-null   object
dtypes: int64(1), object(8)
memory usage: 112.3+ KB


In [12]:
# Rename positions to match NBA Stats
df['Pos'] = df['Pos'].replace(['PG', 'SG'], 'G')
df['Pos'] = df['Pos'].replace(['SF', 'PF'], 'F')

In [14]:
df = df.drop_duplicates(subset=['Player'])

In [16]:
df[df['Player'] == 'Johannes Thiemann']

Unnamed: 0,Year,Player,Pos,HT,WT,Team,Birth City,Draft Status,Nationality
287,2019,Johannes Thiemann,C,6-8,225,ALBA Berlin,Trier,"2016 NBA Draft, Undrafted",Germany


In [13]:
# Saving DataFrame to a CSV file
df.to_csv('bbl_playerbio.csv', index=False)