In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

years = list(range(2019, 2024))

#url_start = 'https://basketball.realgm.com/international/league/12/French-Jeep-Elite/stats/{year}/Per_48/Qualified/All/points/{position}/desc/1/Regular_Season'
url_start = 'https://basketball.realgm.com/international/league/7/Turkish-BSL/stats/{year}/Averages/Qualified/All/points/{position}/desc/1/Regular_Season'
data = []  # List to store parsed data

for year in years:
    for position in ['PG', 'SG', 'SF', 'PF', 'C']: 
        url = url_start.format(year=year, position=position)  # Correctly format the URL
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        for row in soup.select('.tablesaw tbody tr'):
            player = [year, position]  # Add the year and position to the player data
            player += [cell.text.strip() for cell in row.find_all('td')]
            data.append(player)

# Creating DataFrame from the collected data
columns = ['Year', 'Position', '#', 'Player', 'Team', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
           'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']
df = pd.DataFrame(data, columns=columns)

df['Position'] = df['Position'].replace(['PG', 'SG'], 'G')
df['Position'] = df['Position'].replace(['SF', 'PF'], 'F')

df.loc[:, 'GP':] = df.loc[:,'GP':].astype(float)
df.loc[:, 'MPG':] = df.loc[:,'MPG':].mul(1.25)

# Save DataFrame to a CSV file
df.to_csv('DataFiles/euroleague_stats_Turkey.csv', index=False)


# Display DataFrame
df

Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,...,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2019,G,1,Tu Holloway,IBB,21.0,42.75,25.125,8.0,17.0,...,8.75,1.0275,0.5,2.875,3.5,5.875,1.75,0.125,3.0,2.625
1,2019,G,2,Quim Colom,BAHC,27.0,40.375,19.5,5.875,14.625,...,5.75,1.01,0.5,3.375,3.875,7.5,1.125,0.0,3.375,2.125
2,2019,G,3,Shane Larkin,EFE,24.0,34.625,18.875,6.125,13.25,...,4.5,1.17875,0.875,2.5,3.5,5.625,1.875,0.125,2.5,2.375
3,2019,G,4,Alexander Perez Kaufmann,BVS,29.0,39.75,18.375,6.5,14.0,...,3.875,1.055,0.375,3.0,3.375,7.0,1.125,0.125,2.5,2.75
4,2019,G,5,Diante Garrett,TOF,20.0,40.125,18.25,7.5,15.25,...,2.125,0.8825,0.75,3.375,4.0,7.875,1.625,0.125,3.25,2.375
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
899,2023,C,31,Tolga Birer,MANI,18.0,8.625,1.625,0.5,1.375,...,1.25,0.555,0.5,1.0,1.5,0.25,0.125,0.25,0.625,1.125
900,2023,C,32,Semih Erden,TUR,30.0,10.5,1.625,0.625,1.5,...,1.25,0.3875,0.5,1.625,2.125,0.5,0.25,0.25,0.75,1.875
901,2023,C,33,Mahir Agva,GAL,30.0,11.75,1.625,0.5,2.0,...,0.625,0.9375,1.5,2.375,3.875,0.25,0.375,0.25,0.5,2.625
902,2023,C,34,Mehmet Demirel,EFE,10.0,3.0,1.0,0.5,0.75,...,0.25,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.5


In [None]:
#renaming positions
df['Position'] = df['Position'].replace(['PG', 'SG'], 'G')
df['Position'] = df['Position'].replace(['SF', 'PF'], 'F')

In [None]:
#calculation for 100 possessions
df.loc[:, 'GP':] = df.loc[:,'GP':].astype(float)
df.loc[:, 'MPG':] = df.loc[:,'MPG':].mul(1.25)

In [None]:
#filtering for center
#df_center = df[df['Position'] == 'C']
#df_center_filtered = df_center[(df_center['RPG'] >= 13.0) & (df_center['BPG'] >= 2.0 ) & (df_center['APG'] >= 3.0)]