### Adriatic ABA Players Dataset

* Scrape Players Stats data for
* Change data type
* Rename positions
* Export to CSV

In [None]:
# Scrape Player Bio for Adriatic (ABA) League

import requests
from bs4 import BeautifulSoup
import pandas as pd

# Configure pandas to display all columns in DataFrame
pd.set_option('display.max_columns', None)

# Define the range of years to scrape
years = list(range(2019, 2025))

# Base URL for scraping
url_start = "https://basketball.realgm.com/international/league/18/Adriatic-League-Liga-ABA/players/{}/{}"

# List to store parsed data
data = []

# List of index values (if these are fixed, otherwise modify accordingly)
index = ['834', '935', '1024', '1121', '1222']

# Iterate over each index and corresponding year
for index, year in zip(index, years):
    url = url_start.format(index, year)  # Properly formatted URL
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    rows = soup.select('.tablesaw tbody tr')
    print(f"Year: {year}, Rows: {len(rows)}")  # Debugging print

# Extract and clean player data from each table row, including the year, and add it to the data list.
    for row in rows:
        player = [year]  # Add the year and position to the player data
        player += [cell.text.strip() for cell in row.find_all('td')]
        data.append(player)

# Extracting column names from the first row of the table
columns = ['Year'] + [header.text.strip() for header in soup.select('.tablesaw thead th')]

# Creating DataFrame from the collected data
df = pd.DataFrame(data, columns=columns)

# Displaying just the first few rows for clarity
df 

In [None]:
# Check data type for columns
df.info()

In [None]:
# Rename positions to match NBA Stats
df['Pos'] = df['Pos'].replace(['PG', 'SG'], 'G')
df['Pos'] = df['Pos'].replace(['SF', 'PF'], 'F')

In [None]:
df = df.drop_duplicates(subset=['Player'])

In [None]:
# Saving DataFrame to a CSV file
df.to_csv('aba_playerbio.csv', index=False)