### Lithuanian LKL Players Dataset

* Scrape Players Stats data for
* Change data type
* Rename positions
* Export to CSV

In [1]:
# Scrape Player Bio in German BBL

import requests
from bs4 import BeautifulSoup
import pandas as pd

pd.set_option('display.max_columns', None)  # so we can see all columns in a wide DataFrame

years = list(range(2019, 2025))

url_start = "https://basketball.realgm.com/international/league/10/Lithuanian-LKL/players/{}/{}"

data = []  # List to store parsed data
index = ['768', '869', '958', '1052', '1147']
for index, year in zip(index, years):
    url = url_start.format(index, year)  # Properly formatted URL
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    rows = soup.select('.tablesaw tbody tr')
    print(f"Year: {year}, Rows: {len(rows)}")  # Debugging print

    for row in rows:
        player = [year]  # Add the year and position to the player data
        player += [cell.text.strip() for cell in row.find_all('td')]
        data.append(player)

# Extracting column names from the first row of the table
columns = ['Year'] + [header.text.strip() for header in soup.select('.tablesaw thead th')]

# Creating DataFrame from the collected data
df = pd.DataFrame(data, columns=columns)

# Displaying just the first few rows for clarity
df 

Year: 2019, Rows: 158
Year: 2020, Rows: 169
Year: 2021, Rows: 171
Year: 2022, Rows: 208
Year: 2023, Rows: 204


Unnamed: 0,Year,Player,Pos,HT,WT,Team,Birth City,Draft Status,Nationality
0,2019,Daniel Amigo,C,6-10,245,Pieno Zvaigzdes,El Paso (TX),"2018 NBA Draft, Undrafted",MexicoArgentina
1,2019,Steponas Babrauskas,SG,6-5,198,Lietuvos Rytas,Trakai,"2006 NBA Draft, Undrafted",Lithuania
2,2019,Cameron Bairstow,F,6-9,250,Lietuvos Rytas,Brisbane,2014 Rnd 2 Pick 19,Australia
3,2019,Juozas Balciunas,G,6-2,190,Kedainiai Nevezis,Kaunas,"2018 NBA Draft, Undrafted",Lithuania
4,2019,Laurynas Balkunas,SF,6-5,198,Pieno Zvaigzdes,Varena,"2021 NBA Draft, Undrafted",Lithuania
...,...,...,...,...,...,...,...,...,...
905,2023,Jonas Zakas,SG,6-3,187,Juventus,Vilnius,"2019 NBA Draft, Undrafted",Lithuania
906,2023,Modestas Zauneriunas,G,6-2,176,Gargzdu Gargzdai,Kaunas,"2016 NBA Draft, Undrafted",Lithuania
907,2023,Kristupas Zemaitis,G,6-4,170,BC Wolves,Kaunas,"2018 NBA Draft, Undrafted",Lithuania
908,2023,Deividas Zukauskas,PG,6-4,185,Siauliai,Siauliai,2027 NBA Draft Eligible,Lithuania


In [2]:
# Check data type for columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 910 entries, 0 to 909
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   Year          910 non-null    int64 
 1   Player        910 non-null    object
 2   Pos           910 non-null    object
 3   HT            910 non-null    object
 4   WT            910 non-null    object
 5   Team          910 non-null    object
 6   Birth City    910 non-null    object
 7   Draft Status  910 non-null    object
 8   Nationality   910 non-null    object
dtypes: int64(1), object(8)
memory usage: 64.1+ KB


In [3]:
# Rename positions to match NBA Stats
df['Pos'] = df['Pos'].replace(['PG', 'SG'], 'G')
df['Pos'] = df['Pos'].replace(['SF', 'PF'], 'F')

In [4]:
df = df.drop_duplicates(subset=['Player'])

In [5]:
# Saving DataFrame to a CSV file
df.to_csv('lkl_playerbio.csv', index=False)