In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

years = list(range(2019, 2024))

#url_start = 'https://basketball.realgm.com/international/league/12/French-Jeep-Elite/stats/{year}/Per_48/Qualified/All/points/{position}/desc/1/Regular_Season'
url_start = 'https://basketball.realgm.com/international/league/8/Greek-HEBA-A1/stats/{year}/Averages/Qualified/All/points/{position}/desc/1/Regular_Season'
data = []  # List to store parsed data

for year in years:
    for position in ['PG', 'SG', 'SF', 'PF', 'C']: 
        url = url_start.format(year=year, position=position)  # Correctly format the URL
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        for row in soup.select('.tablesaw tbody tr'):
            player = [year, position]  # Add the year and position to the player data
            player += [cell.text.strip() for cell in row.find_all('td')]
            data.append(player)

# Creating DataFrame from the collected data
columns = ['Year', 'Position', '#', 'Player', 'Team', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
           'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']
df = pd.DataFrame(data, columns=columns)

df['Position'] = df['Position'].replace(['PG', 'SG'], 'G')
df['Position'] = df['Position'].replace(['SF', 'PF'], 'F')

df.loc[:, 'GP':] = df.loc[:,'GP':].astype(float)
df.loc[:, 'MPG':] = df.loc[:,'MPG':].mul(1.25)

# Save DataFrame to a CSV file
df.to_csv('DataFiles/euroleague_stats_Greek.csv', index=False)


# Display DataFrame
df

Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,...,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2019,G,1,William Hatcher,PAOK,23.0,30.375,16.75,5.375,11.625,...,4.5,0.97875,0.25,1.75,2.0,3.875,0.875,0.0,1.625,3.25
1,2019,G,2,Ryan Harrow,PER,18.0,29.125,15.75,5.875,12.375,...,3.5,0.95,0.75,2.25,3.0,6.0,1.0,0.125,3.0,1.375
2,2019,G,3,Stevie Browning,KOL,26.0,32.875,15.625,6.25,14.625,...,1.25,0.865,0.875,3.375,4.25,3.0,0.625,0.125,2.5,2.875
3,2019,G,4,Lefteris Bochoridis,ARI,23.0,31.625,14.25,5.125,12.875,...,3.75,0.7975,1.125,3.875,5.0,3.25,0.875,0.25,3.5,4.25
4,2019,G,5,Nontas Papantoniou,PAI,22.0,37.25,14.125,4.625,12.25,...,4.5,0.86,0.625,3.125,3.875,4.875,1.75,0.0,3.125,3.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
724,2023,C,18,Georgios Tanoulis,PROM,14.0,11.125,4.75,2.0,3.75,...,0.75,0.9375,1.375,1.5,2.875,0.75,0.375,0.125,0.5,1.5
725,2023,C,19,Dimitrios Kaklamanakis,PAOK,18.0,17.75,4.25,1.875,3.875,...,1.0,0.625,2.0,3.0,5.125,1.375,0.5,0.375,1.375,2.0
726,2023,C,20,Zisis Sarikopoulos,KARD,12.0,11.0,2.25,1.0,2.375,...,0.375,0.625,1.375,1.875,3.25,0.375,0.25,0.25,0.5,1.375
727,2023,C,21,Michail Sprintzios,LAV,11.0,9.25,1.25,0.5,1.5,...,0.25,0.0,0.125,0.75,0.875,0.375,0.375,0.0,0.25,1.625


In [None]:
#renaming positions
df['Position'] = df['Position'].replace(['PG', 'SG'], 'G')
df['Position'] = df['Position'].replace(['SF', 'PF'], 'F')

In [None]:
#calculation for 100 possessions
df.loc[:, 'GP':] = df.loc[:,'GP':].astype(float)
df.loc[:, 'MPG':] = df.loc[:,'MPG':].mul(1.25)

In [None]:
#filtering for center
#df_center = df[df['Position'] == 'C']
#df_center_filtered = df_center[(df_center['RPG'] >= 13.0) & (df_center['BPG'] >= 2.0 ) & (df_center['APG'] >= 3.0)]