In [116]:
import requests
from bs4 import BeautifulSoup
import pandas as pd


In [117]:
# URL of the page to scrape
url = 'https://basketball.realgm.com/international/league/1/Euroleague/players'

# Send a GET request to fetch the page content
response = requests.get(url)
page_content = response.content

In [118]:
soup = BeautifulSoup(page_content, 'html.parser')

In [119]:
# Find the table body (tbody) and all rows (tr) within it
table_body = soup.find('tbody')
rows = table_body.find_all('tr')

players = []

# Loop through each row (tr) to get player data
for row in rows:
    # Extract player data from each row
    cells = row.find_all('td')
    
    # Extract player name from the first cell (assuming it is in an <a> tag)
    name_tag = cells[0].find('a')  # Player name is assumed to be in the first cell inside <a> tag
    if name_tag:
        player_name = name_tag.get_text(strip=True)
    else:
        player_name = "Unknown"  # Default if name is missing
    
    # Extract height from the third column (height)
    height = cells[2].get_text(strip=True)
    
    # Extract weight from the fourth column (weight)
    weight = cells[3].get_text(strip=True)

    # Store the player information
    players.append({
        'name': player_name,
        'height': height,
        'weight': weight
    })

# Print the scraped player data
# for i in range(0,10):
#     for player in players:
#         print(player)

In [120]:
# URL of the page to scrape for Player Stats
url = 'https://basketball.realgm.com/international/league/1/Euroleague/stats'

# Send a GET request to fetch the page content
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
page_content = response.content

In [121]:
# Grab the first stats table on the page (this is the main stats table)
table = soup.find('table')
if not table:
    raise Exception("Table not found on the page.")

# Get the table headers
thead = table.find('thead')
header_cells = thead.find_all('th')
column_names = [cell.get_text(strip=True) for cell in header_cells]

# Extract player rows
tbody = table.find('tbody')
rows = tbody.find_all('tr')

player_stats = []

for row in rows:
    cells = row.find_all('td')
    if len(cells) != len(column_names):
        continue  # skip rows that don't match

    player_data = {}
    for i, cell in enumerate(cells):
        if i == 0:
            # Extract name from <a> tag if available
            link = cell.find('a')
            player_data[column_names[i]] = link.get_text(strip=True) if link else cell.get_text(strip=True)
        else:
            player_data[column_names[i]] = cell.get_text(strip=True)
    player_stats.append(player_data)

# Print each player's stats
# for player in player_stats:
#     print(player)


In [122]:
# Convert height/weight data to a dictionary for faster lookup
height_weight_map = {player['name']: {'height': player['height'], 'weight': player['weight']} for player in players}

# Merge height and weight into the player_stats list
for stat in player_stats:
    name = stat['Player']
    if name in height_weight_map:
        stat['Height'] = height_weight_map[name]['height']
        stat['Weight'] = height_weight_map[name]['weight']
    else:
        stat['Height'] = 'N/A'
        stat['Weight'] = 'N/A'

# Optionally: create a DataFrame and export to CSV (ONLY TO TEST IT)
# df = pd.DataFrame(player_stats)
# df.to_csv('euroleague_player_stats_with_height_weight.csv', index=False)

# Or just print the merged data
# for player in player_stats:
#     print(player)


In [123]:
all_stats = []

for year in range(2024, 1999, -1):
    url = f'https://basketball.realgm.com/international/league/1/Euroleague/stats/{year}'
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find('table')
    if not table:
        continue

    thead = table.find('thead')
    header_cells = thead.find_all('th')
    column_names = [cell.get_text(strip=True) for cell in header_cells]

    tbody = table.find('tbody')
    rows = tbody.find_all('tr')

    for row in rows:
        cells = row.find_all('td')
        if len(cells) != len(column_names):
            continue

        player_data = {'Season': year}
        for i, cell in enumerate(cells):
            text = cell.get_text(strip=True)
            if i == 0:
                link = cell.find('a')
                player_data[column_names[i]] = link.get_text(strip=True) if link else text
            else:
                player_data[column_names[i]] = text

        all_stats.append(player_data)


In [125]:
# Convert height/weight data to a dictionary for faster lookup
height_weight_map = {player['name']: {'height': player['height'], 'weight': player['weight']} for player in players}

# Merge height and weight into the all_stats list
for stat in all_stats:
    name = stat['Player']
    if name in height_weight_map:
        stat['Height'] = height_weight_map[name]['height']
        stat['Weight'] = height_weight_map[name]['weight']
    else:
        stat['Height'] = 'N/A'
        stat['Weight'] = 'N/A'

# Convert the combined data to a DataFrame
df = pd.DataFrame(all_stats)

# Drop any row with N/A values
df = df[~df.isin(['N/A']).any(axis=1)]

# Save the combined data to a CSV file
df.to_csv('latest_euroleague_player_stats_with_height_weight.csv', index=False)

# Optionally, print the DataFrame
# print(df)
