### Adriatic ABA Players Dataset

* Scrape Players Stats data for
* Change data type
* Rename positions
* Export to CSV

In [1]:
# Scrape Player Bio in German BBL

import requests
from bs4 import BeautifulSoup
import pandas as pd

pd.set_option('display.max_columns', None)  # so we can see all columns in a wide DataFrame

years = list(range(2019, 2025))

url_start = "https://basketball.realgm.com/international/league/18/Adriatic-League-Liga-ABA/players/{}/{}"

data = []  # List to store parsed data
index = ['834', '935', '1024', '1121', '1222']
for index, year in zip(index, years):
    url = url_start.format(index, year)  # Properly formatted URL
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    rows = soup.select('.tablesaw tbody tr')
    print(f"Year: {year}, Rows: {len(rows)}")  # Debugging print

    for row in rows:
        player = [year]  # Add the year and position to the player data
        player += [cell.text.strip() for cell in row.find_all('td')]
        data.append(player)

# Extracting column names from the first row of the table
columns = ['Year'] + [header.text.strip() for header in soup.select('.tablesaw thead th')]

# Creating DataFrame from the collected data
df = pd.DataFrame(data, columns=columns)

# Displaying just the first few rows for clarity
df 

Year: 2019, Rows: 230
Year: 2020, Rows: 271
Year: 2021, Rows: 275
Year: 2022, Rows: 298
Year: 2023, Rows: 285


Unnamed: 0,Year,Player,Pos,HT,WT,Team,Birth City,Draft Status,Nationality
0,2019,Bashir Ahmed,SF,6-7,210,KK Zadar,New York (NY),"2018 NBA Draft, Undrafted",United States
1,2019,Byron Allen,C,6-7,275,KK Zadar,Leland (MS),"2006 NBA Draft, Undrafted",United States
2,2019,Braian Angola-Rodas,F,6-6,195,KK Partizan,Villanueva,"2018 NBA Draft, Undrafted",Colombia
3,2019,Dominic Artis,G,6-3,190,KK Cedevita Olimpija Ljubljana,Oakland (CA),"2017 NBA Draft, Undrafted",United StatesKosovo
4,2019,Luka Asceric,SG,6-7,196,KK Mega Bemax,Sankt Polten,"2019 NBA Draft, Undrafted",AustriaSerbia
...,...,...,...,...,...,...,...,...,...
1354,2023,Corey Webster,G,6-2,190,KK Mornar,Auckland,"2010 NBA Draft, Undrafted",New Zealand
1355,2023,Jacorey Williams,PF,6-8,220,Buducnost,Birmingham (AL),"2017 NBA Draft, Undrafted",United States
1356,2023,McKinley Wright IV,PG,5-11,192,Buducnost,Champlin Park (MN),"2021 NBA Draft, Undrafted",United States
1357,2023,Karlo Zganec,F,6-9,225,KK Zadar,Zagreb,"2017 NBA Draft, Undrafted",Croatia


In [2]:
# Check data type for columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1359 entries, 0 to 1358
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Year          1359 non-null   int64 
 1   Player        1359 non-null   object
 2   Pos           1359 non-null   object
 3   HT            1359 non-null   object
 4   WT            1359 non-null   object
 5   Team          1359 non-null   object
 6   Birth City    1359 non-null   object
 7   Draft Status  1359 non-null   object
 8   Nationality   1359 non-null   object
dtypes: int64(1), object(8)
memory usage: 95.7+ KB


In [3]:
# Rename positions to match NBA Stats
df['Pos'] = df['Pos'].replace(['PG', 'SG'], 'G')
df['Pos'] = df['Pos'].replace(['SF', 'PF'], 'F')

In [4]:
df = df.drop_duplicates(subset=['Player'])

In [5]:
# Saving DataFrame to a CSV file
df.to_csv('aba_playerbio.csv', index=False)