In [51]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

pd.set_option('display.max_columns', None)  # so we can see all columns in a wide DataFrame

years = list(range(2004, 2025))
url_start = "https://basketball.realgm.com/international/league/12/French-Jeep-Elite/stats/{year}/Per_48/Qualified/All//points/{position}/desc/1/Regular_Season"

data = []  # List to store parsed data

for year in years:
    for position in ['PG', 'SG', 'SF', 'PF', 'C']:
        url = url_start.format(year=year, position=position)  # Include both year and position in URL
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        for row in soup.select(f'.tablesaw tbody tr'):
            player = [year, position]  # Add the year and position to the player data
            player += [cell.text.strip() for cell in row.find_all('td')]
            data.append(player)

# Creating DataFrame from the collected data
columns = ['Year', 'Position', '#', 'Player', 'Team', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
           'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']

df = pd.DataFrame(data, columns=columns)

# Saving DataFrame to a CSV file
df.to_csv('euroleague_stats.csv', index=False)

# Display DataFrame
df  # Displaying just the first few rows for clarity

Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2005,PG,1,Jason Rowe,HYT,30,35.9,26.5,9.1,19.5,.468,3.4,8.7,.390,4.9,5.7,.852,1.0,3.7,4.6,8.4,3.1,0.0,4.7,2.9
1,2005,PG,2,Terrell Lyday,LYV,36,32.9,25.8,9.0,18.0,.499,3.2,7.5,.422,4.7,6.0,.791,1.0,4.5,5.5,6.4,2.5,0.5,3.4,3.9
2,2005,PG,3,Marcus Wilson,GRV,23,31.0,25.0,8.0,17.5,.454,3.2,8.7,.372,5.9,6.5,.897,0.5,1.8,2.2,4.2,1.0,0.1,2.5,3.2
3,2005,PG,4,Afik Nissim,STR,37,19.7,24.4,8.3,17.9,.465,4.4,9.6,.455,3.4,4.2,.825,0.6,3.2,3.8,5.4,1.6,0.0,2.9,3.5
4,2005,PG,5,Mire Chatman,PAU,27,32.8,23.3,8.5,17.9,.473,2.2,6.7,.331,4.2,6.0,.700,1.0,4.2,5.2,8.8,3.5,0.2,3.4,2.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2698,2024,C,27,Mattias Markusson,CHA,19,14.3,15.0,6.2,9.4,.660,0.0,0.2,.000,2.6,3.2,.833,4.4,8.5,12.9,1.9,1.1,1.1,2.6,5.3
2699,2024,C,28,Wilfried Yeguete,LEM,28,21.3,14.9,6.4,12.9,.497,0.0,0.0,.000,2.0,4.1,.490,5.3,9.2,14.5,3.5,1.4,0.3,3.6,6.1
2700,2024,C,29,John Egbunu,LYV,15,11.9,11.8,4.3,7.2,.593,0.0,0.0,.000,3.2,6.2,.522,3.8,7.2,11.0,0.5,0.3,1.3,4.3,7.2
2701,2024,C,30,Hamady Ndiaye,N92,26,14.6,11.8,5.3,7.0,.764,0.0,0.0,.000,1.1,2.7,.429,3.2,2.7,5.8,1.4,0.8,2.8,1.6,5.6


In [55]:
position_counts = df['Position'].value_counts()
position_counts


PF    608
SG    589
SF    575
PG    531
C     400
Name: Position, dtype: int64

In [46]:
# Replacing intermediary positions with one. All in all there are 3 positions - C, F and G. 
df['Position'] = df['Position'].str.replace('F-C', 'F').replace('C-F', 'C').replace('F-G', 'F').replace('G-F', 'G')

In [50]:
# Number of positions per unique European player:
df.groupby('Position')['Player'].nunique()


Position
C     232
PF    337
PG    275
SF    309
SG    304
Name: Player, dtype: int64

In [56]:
column_types = df.dtypes
column_types

Year         int64
Position    object
#           object
Player      object
Team        object
GP          object
MPG         object
PPG         object
FGM         object
FGA         object
FG%         object
3PM         object
3PA         object
3P%         object
FTM         object
FTA         object
FT%         object
ORB         object
DRB         object
RPG         object
APG         object
SPG         object
BPG         object
TOV         object
PF          object
dtype: object

In [57]:
# List of columns to convert to integer
columns_to_convert = ['Year', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', '3PM', '3PA', 'FTM', 'FTA', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']

# Convert columns to integer
for column in columns_to_convert:
    df[column] = pd.to_numeric(df[column], errors='coerce')  # Convert to numeric, coerce errors to NaN

# Fill NaN values with appropriate values or drop them
# For example, you can fill NaN with 0
df.fillna(0, inplace=True)

# Convert to integer
df[columns_to_convert] = df[columns_to_convert].astype(int)

# Check the data types after conversion
column_types


Year         int64
Position    object
#           object
Player      object
Team        object
GP          object
MPG         object
PPG         object
FGM         object
FGA         object
FG%         object
3PM         object
3PA         object
3P%         object
FTM         object
FTA         object
FT%         object
ORB         object
DRB         object
RPG         object
APG         object
SPG         object
BPG         object
TOV         object
PF          object
dtype: object