In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
years = list(range(2019, 2024))
url_start = 'https://basketball.realgm.com/international/league/14/Croatian-A-1-Liga/stats/{year}/Per_48/Qualified/All/points/{position}/desc/1/Regular_Season'
data = []  # List to store parsed data
for year in years:
    for position in ['PG', 'SG', 'SF', 'PF', 'C']:
        url = url_start.format(year=year, position=position)  # Correctly format the URL
        response = requests.get(url)
        soup = BeautifulSoup(response.content, 'html.parser')
        for row in soup.select('.tablesaw tbody tr'):
            player = [year, position]  # Add the year and position to the player data
            player += [cell.text.strip() for cell in row.find_all('td')]
            data.append(player)
# Creating DataFrame from the collected data
columns = ['Year', 'Position', '#', 'Player', 'Team', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
           'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']
df = pd.DataFrame(data, columns=columns)
# Save DataFrame to a CSV file
df.to_csv('croatian_stats_with_positions.csv', index=False)
# Display DataFrame
df 
# croatian_stats_with_position = df

Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,...,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2019,PG,1,Martin Junakovic,SIB,11,31.9,29.7,10.3,23.9,...,8.6,.841,0.7,4.5,5.2,9.4,3.4,0.1,5.5,3.8
1,2019,PG,2,Kresimir Radovcic,ZAB,23,27.4,27.5,9.8,21.9,...,6.2,.852,1.1,4.6,5.6,6.6,2.1,0.0,4.0,4.0
2,2019,PG,3,Brian Williams,SRKL,19,25.3,26.6,10.6,20.7,...,5.4,.741,2.5,4.0,6.5,4.8,2.9,0.2,3.1,4.6
3,2019,PG,4,Justin Cobbs,CED,12,22.3,26.0,8.1,19.0,...,8.8,.918,1.1,3.1,4.1,9.2,1.3,0.0,4.1,4.5
4,2019,PG,5,Mario Ihring,VRI,23,32.4,24.8,7.7,16.9,...,8.6,.842,0.8,6.4,7.3,10.0,2.1,0.3,6.4,3.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
676,2023,C,23,Josip Jukic,SPL,33,14.3,11.8,4.9,11.2,...,3.0,.667,6.7,7.6,14.3,3.5,1.5,0.3,1.6,7.7
677,2023,C,24,Jakov Stipanicev,ZAB,33,16.2,11.6,3.6,10.9,...,4.1,.739,2.8,6.9,9.7,1.9,1.3,1.9,2.7,7.5
678,2023,C,25,Stipe Sokol,VEL,15,7.6,11.0,3.8,9.7,...,5.1,.667,3.0,3.8,6.8,1.3,0.0,0.4,1.3,11.8
679,2023,C,26,Petar Markota,RIB,2,17.7,10.8,5.4,13.5,...,2.7,.000,4.1,10.8,14.9,0.0,0.0,4.1,2.7,6.8


In [2]:
croatian_stats_with_position = df

In [3]:
croatian_stats_with_position['Position'] = croatian_stats_with_position['Position'].replace(['PG', 'SG'], 'G')
croatian_stats_with_position['Position'] = croatian_stats_with_position['Position'].replace(['SF', 'PF'], 'F')

In [4]:
# Scrape Player Bio in German BBL

import requests
from bs4 import BeautifulSoup
import pandas as pd

pd.set_option('display.max_columns', None)  # so we can see all columns in a wide DataFrame

years = list(range(2019, 2025))

url_start = "https://basketball.realgm.com/international/league/14/Croatian-A-1-Liga/players/{}/{}"

data = []  # List to store parsed data
index = ['658', '762', '863', '952', '1046']
for index, year in zip(index, years):
    url = url_start.format(index, year)  # Properly formatted URL
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    rows = soup.select('.tablesaw tbody tr')
    print(f"Year: {year}, Rows: {len(rows)}")  # Debugging print

    for row in rows:
        player = [year]  # Add the year and position to the player data
        player += [cell.text.strip() for cell in row.find_all('td')]
        data.append(player)

# Extracting column names from the first row of the table
columns = ['Year'] + [header.text.strip() for header in soup.select('.tablesaw thead th')]

# Creating DataFrame from the collected data
df = pd.DataFrame(data, columns=columns)


# Displaying just the first few rows for clarity
df 

Year: 2019, Rows: 228
Year: 2020, Rows: 207
Year: 2021, Rows: 215
Year: 2022, Rows: 238
Year: 2023, Rows: 215


Unnamed: 0,Year,Player,Pos,HT,WT,Team,Birth City,Draft Status,Nationality
0,2019,Abdul-Malik Abu,F,6-8,240,KK Vrijednosnice Osijek,Boston (MA),"2018 NBA Draft, Undrafted",United StatesNigeria
1,2019,Pablo Aguilar,PF,6-8,235,Cedevita,Granada,"2011 NBA Draft, Undrafted",Spain
2,2019,Andro Anzulovic,SG,6-4,200,KK Zabok,Zagreb,"2013 NBA Draft, Undrafted",Croatia
3,2019,Roko Anzulovic,G,6-5,-,KK Velika Gorica,,"2018 NBA Draft, Undrafted",Croatia
4,2019,Petar Babic,SG,6-2,196,KK Bosco Zagreb,Zagreb,"2007 NBA Draft, Undrafted",Croatia
...,...,...,...,...,...,...,...,...,...
1098,2023,Jurica Zagorsak,G,6-6,-,KK Zadar,,2027 NBA Draft Eligible,Croatia
1099,2023,Vice Zanki,G,6-7,-,KK Cedevita Junior Zagreb,Zagreb,2027 NBA Draft Eligible,Croatia
1100,2023,Marko Zebic,F,6-4,-,KK Ribola Kastela,,2026 NBA Draft Eligible,Croatia
1101,2023,Karlo Zganec,F,6-9,225,KK Zadar,Zagreb,"2017 NBA Draft, Undrafted",Croatia


In [21]:
croatian_bio_players = df

In [30]:
croatian_bio_players  = croatian_bio_players .drop_duplicates(subset=['Player'])

In [31]:
croatian_bio_players

Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2012,G,1,Domagoj Basic,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5
1,2012,G,2,Roko Rogic,ALK,30,24.2,23.3,8.0,18.1,0.442,1.8,6.5,0.276,5.5,6.7,0.824,1.4,5.0,6.3,4.1,1.8,0.0,4.2,4.3
2,2012,G,3,Franjo Bubalo,ALK,27,29.1,21.4,7.7,16.9,0.457,1.6,5.0,0.317,4.4,5.7,0.766,1.0,2.4,3.4,4.9,1.7,0.1,3.7,3.4
3,2012,G,4,Mario Pesut,PUN,30,23.6,19.6,7.1,17.0,0.420,1.0,5.3,0.179,4.4,7.1,0.625,2.6,2.5,5.1,5.2,2.9,0.1,4.1,5.0
4,2012,G,5,Mislav Peric,DUB,30,25.7,19.6,6.9,15.3,0.453,2.4,6.9,0.351,3.3,4.7,0.707,0.4,3.1,3.5,2.5,2.1,0.0,5.0,6.7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1276,2023,C,19,Emeka Obukwelu,SRKL,12,19.9,15.7,5.6,12.9,0.438,1.2,4.0,0.300,3.2,4.2,0.762,1.6,8.4,10.1,1.6,0.0,0.4,2.4,5.8
1277,2023,C,20,Ivan Juric,DUB,16,6.0,15.6,6.0,14.6,0.414,0.5,1.5,0.333,3.0,5.5,0.545,5.5,7.0,12.6,1.5,1.5,0.5,6.5,7.5
1278,2023,C,21,Mario Kresic,RIB,2,20.6,14.0,4.7,14.0,0.333,2.3,4.7,0.500,2.3,2.3,1.000,1.2,5.8,7.0,1.2,1.2,2.3,2.3,10.5
1281,2023,C,24,Jakov Stipanicev,ZAB,33,16.2,11.6,3.6,10.9,0.331,1.3,5.7,0.234,3.1,4.1,0.739,2.8,6.9,9.7,1.9,1.3,1.9,2.7,7.5


In [40]:
# Assuming british_merged_df is your DataFrame
croatian_bio_players.to_csv('croatian_bio_players.csv', index=False)

In [41]:
croatian_bio_players['Pos'] = croatian_bio_players['Pos'].replace(['PG', 'SG'], 'G')
croatian_bio_players['Pos'] = croatian_bio_players['Pos'].replace(['SF', 'PF'], 'F')

KeyError: 'Pos'

In [43]:
import pandas as pd

# Load the datasets
croatian_stats_with_position = pd.read_csv('/Users/allendcosta/NBA-NEAT/NEAT-Scouting-Data-Analytics/merged_croatian_bio_stats_data.csv')
croatian_bio_players = pd.read_csv('/Users/allendcosta/NBA-NEAT/NEAT-Scouting-Data-Analytics/croatian_bio_players.csv')

# Merge the datasets on the "Player" column
merged_df = pd.merge(croatian_bio_players, croatian_stats_with_position, on='Player', how='inner')

# # Save the merged DataFrame to a CSV file
# merged_df.to_csv('merged_greek_bio_stats_data.csv', index=False)

# Display the merged DataFrame
merged_df
merged_croatian_df = merged_df

In [44]:
# Assuming british_merged_df is your DataFrame
merged_croatian_df['League'] = 'Greek Basketball League'

# Display DataFrame
merged_croatian_df

Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF,Year_x,Position_x,#_x,Team_x,GP_x,MPG_x,PPG_x,FGM_x,FGA_x,FG%_x,3PM_x,3PA_x,3P%_x,FTM_x,FTA_x,FT%_x,ORB_x,DRB_x,RPG_x,APG_x,SPG_x,BPG_x,TOV_x,PF_x,Year_y,Position_y,#_y,Team_y,GP_y,MPG_y,PPG_y,FGM_y,FGA_y,FG%_y,3PM_y,3PA_y,3P%_y,FTM_y,FTA_y,FT%_y,ORB_y,DRB_y,RPG_y,APG_y,SPG_y,BPG_y,TOV_y,PF_y,League
0,2012,G,1,Domagoj Basic,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2012,G,1,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2012,PG,1,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,Greek Basketball League
1,2012,G,1,Domagoj Basic,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2012,G,1,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2014,SG,1,SIB,32,29.3,24.3,8.5,17.5,0.485,4.4,9.6,0.460,2.8,3.2,0.887,0.4,2.7,3.1,2.4,1.2,0.1,2.4,3.9,Greek Basketball League
2,2012,G,1,Domagoj Basic,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2012,G,1,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2015,SG,2,SIB,30,24.2,21.9,7.3,17.7,0.414,4.3,10.7,0.401,3.0,3.4,0.882,0.4,2.2,2.6,1.9,1.3,0.1,2.6,4.0,Greek Basketball League
3,2012,G,1,Domagoj Basic,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2012,G,1,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2016,SG,7,SIB,30,30.4,20.9,7.4,16.6,0.444,4.3,9.7,0.438,2.1,2.4,0.848,0.5,3.7,4.3,2.3,1.2,0.0,1.8,3.6,Greek Basketball League
4,2012,G,1,Domagoj Basic,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2012,G,1,ZAB,31,21.1,24.6,8.7,18.1,0.480,5.2,11.8,0.444,2.1,2.2,0.933,0.3,2.1,2.4,2.3,1.5,0.1,2.9,3.5,2017,SG,8,SIB,29,18.1,19.1,6.5,15.2,0.428,4.6,11.3,0.403,1.6,2.0,0.773,0.5,3.1,3.6,2.5,0.6,0.0,0.9,3.5,Greek Basketball League
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1280,2023,C,19,Emeka Obukwelu,SRKL,12,19.9,15.7,5.6,12.9,0.438,1.2,4.0,0.300,3.2,4.2,0.762,1.6,8.4,10.1,1.6,0.0,0.4,2.4,5.8,2023,C,19,SRKL,12,19.9,15.7,5.6,12.9,0.438,1.2,4.0,0.300,3.2,4.2,0.762,1.6,8.4,10.1,1.6,0.0,0.4,2.4,5.8,2023,C,19,SRKL,12,19.9,15.7,5.6,12.9,0.438,1.2,4.0,0.300,3.2,4.2,0.762,1.6,8.4,10.1,1.6,0.0,0.4,2.4,5.8,Greek Basketball League
1281,2023,C,20,Ivan Juric,DUB,16,6.0,15.6,6.0,14.6,0.414,0.5,1.5,0.333,3.0,5.5,0.545,5.5,7.0,12.6,1.5,1.5,0.5,6.5,7.5,2023,C,20,DUB,16,6.0,15.6,6.0,14.6,0.414,0.5,1.5,0.333,3.0,5.5,0.545,5.5,7.0,12.6,1.5,1.5,0.5,6.5,7.5,2023,C,20,DUB,16,6.0,15.6,6.0,14.6,0.414,0.5,1.5,0.333,3.0,5.5,0.545,5.5,7.0,12.6,1.5,1.5,0.5,6.5,7.5,Greek Basketball League
1282,2023,C,21,Mario Kresic,RIB,2,20.6,14.0,4.7,14.0,0.333,2.3,4.7,0.500,2.3,2.3,1.000,1.2,5.8,7.0,1.2,1.2,2.3,2.3,10.5,2023,C,21,RIB,2,20.6,14.0,4.7,14.0,0.333,2.3,4.7,0.500,2.3,2.3,1.000,1.2,5.8,7.0,1.2,1.2,2.3,2.3,10.5,2023,C,21,RIB,2,20.6,14.0,4.7,14.0,0.333,2.3,4.7,0.500,2.3,2.3,1.000,1.2,5.8,7.0,1.2,1.2,2.3,2.3,10.5,Greek Basketball League
1283,2023,C,24,Jakov Stipanicev,ZAB,33,16.2,11.6,3.6,10.9,0.331,1.3,5.7,0.234,3.1,4.1,0.739,2.8,6.9,9.7,1.9,1.3,1.9,2.7,7.5,2023,C,24,ZAB,33,16.2,11.6,3.6,10.9,0.331,1.3,5.7,0.234,3.1,4.1,0.739,2.8,6.9,9.7,1.9,1.3,1.9,2.7,7.5,2023,C,24,ZAB,33,16.2,11.6,3.6,10.9,0.331,1.3,5.7,0.234,3.1,4.1,0.739,2.8,6.9,9.7,1.9,1.3,1.9,2.7,7.5,Greek Basketball League


In [45]:

# Save the merged DataFrame to a CSV file
merged_df.to_csv('merged_croatian_bio_stats_data.csv', index=False)