In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

years = list(range(2019, 2024))

#url_start = 'https://basketball.realgm.com/international/league/12/French-Jeep-Elite/stats/{year}/Per_48/Qualified/All/points/{position}/desc/1/Regular_Season'
url_start = 'https://basketball.realgm.com/international/league/14/Croatian-A-1-Liga/stats/{year}/Per_48/Qualified/All/points/{position}/desc/1/Regular_Season'
data = []  # List to store parsed data

for year in years:
    for position in ['PG', 'SG', 'SF', 'PF', 'C']: 
        url = url_start.format(year=year, position=position)  # Correctly format the URL
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')

        for row in soup.select('.tablesaw tbody tr'):
            player = [year, position]  # Add the year and position to the player data
            player += [cell.text.strip() for cell in row.find_all('td')]
            data.append(player)

# Creating DataFrame from the collected data
columns = ['Year', 'Position', '#', 'Player', 'Team', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
           'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']
df = pd.DataFrame(data, columns=columns)

df['Position'] = df['Position'].replace(['PG', 'SG'], 'G')
df['Position'] = df['Position'].replace(['SF', 'PF'], 'F')

df.loc[:, 'GP':] = df.loc[:,'GP':].astype(float)
df.loc[:, 'MPG':] = df.loc[:,'MPG':].mul(1.25)

# Save DataFrame to a CSV file
df.to_csv('DataFiles/euroleague_stats_Croatia.csv', index=False)


# Display DataFrame
df

Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,...,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2019,G,1,Martin Junakovic,SIB,11.0,39.875,37.125,12.875,29.875,...,10.75,1.05125,0.875,5.625,6.5,11.75,4.25,0.125,6.875,4.75
1,2019,G,2,Kresimir Radovcic,ZAB,23.0,34.25,34.375,12.25,27.375,...,7.75,1.065,1.375,5.75,7.0,8.25,2.625,0.0,5.0,5.0
2,2019,G,3,Brian Williams,SRKL,19.0,31.625,33.25,13.25,25.875,...,6.75,0.92625,3.125,5.0,8.125,6.0,3.625,0.25,3.875,5.75
3,2019,G,4,Justin Cobbs,CED,12.0,27.875,32.5,10.125,23.75,...,11.0,1.1475,1.375,3.875,5.125,11.5,1.625,0.0,5.125,5.625
4,2019,G,5,Mario Ihring,VRI,23.0,40.5,31.0,9.625,21.125,...,10.75,1.0525,1.0,8.0,9.125,12.5,2.625,0.375,8.0,4.625
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
676,2023,C,23,Josip Jukic,SPL,33.0,17.875,14.75,6.125,14.0,...,3.75,0.83375,8.375,9.5,17.875,4.375,1.875,0.375,2.0,9.625
677,2023,C,24,Jakov Stipanicev,ZAB,33.0,20.25,14.5,4.5,13.625,...,5.125,0.92375,3.5,8.625,12.125,2.375,1.625,2.375,3.375,9.375
678,2023,C,25,Stipe Sokol,VEL,15.0,9.5,13.75,4.75,12.125,...,6.375,0.83375,3.75,4.75,8.5,1.625,0.0,0.5,1.625,14.75
679,2023,C,26,Petar Markota,RIB,2.0,22.125,13.5,6.75,16.875,...,3.375,0.0,5.125,13.5,18.625,0.0,0.0,5.125,3.375,8.5


In [None]:
#renaming positions
df['Position'] = df['Position'].replace(['PG', 'SG'], 'G')
df['Position'] = df['Position'].replace(['SF', 'PF'], 'F')

In [None]:
#calculation for 100 possessions
df.loc[:, 'GP':] = df.loc[:,'GP':].astype(float)
df.loc[:, 'MPG':] = df.loc[:,'MPG':].mul(1.25)

In [None]:
#filtering for center
#df_center = df[df['Position'] == 'C']
#df_center_filtered = df_center[(df_center['RPG'] >= 13.0) & (df_center['BPG'] >= 2.0 ) & (df_center['APG'] >= 3.0)]