### Lithuanian LKL Stats Dataset

* Scrape Players Stats data  
* Change data type
* Rename positions
* Add new column and fill each row with league name
* Export to CSV

In [1]:
# Scrape Players Stats for German BBL

import requests
from bs4 import BeautifulSoup
import pandas as pd

pd.set_option('display.max_columns', None)  # so we can see all columns in a wide DataFrame

years = list(range(2019, 2025))
url_start = "https://basketball.realgm.com/international/league/10/Lithuanian-LKL/stats/{}/Per_48/Qualified/All/points/{}/desc/1/Regular_Season"

data = []  # List to store parsed data

for year in years:
    for position in ['PG', 'SG', 'SF', 'PF', 'C']:
        url = url_start.format(year, position)  # Properly formatted URL
        response = requests.get(url)
        if not response.ok:
            print(f"Failed to fetch data for year {year}, position {position}")
            continue
        soup = BeautifulSoup(response.content, 'html.parser')

        rows = soup.select('.tablesaw tbody tr')
        print(f"Year: {year}, Position: {position}, Rows: {len(rows)}")  # Debugging print

        for row in rows:
            player = [year, position]  # Add the year and position to the player data
            player += [cell.text.strip() for cell in row.find_all('td')]
            data.append(player)

# Creating DataFrame from the collected data
columns = ['Year', 'Position', '#', 'Player', 'Team', 'GP', 'MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%',
           'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']

df = pd.DataFrame(data, columns=columns)

# # Saving DataFrame to a CSV file
# df.to_csv('euroleague_stats.csv', index=False)

# Display DataFrame
# df # Displaying just the first few rows for clarity


Year: 2019, Position: PG, Rows: 13
Year: 2019, Position: SG, Rows: 19
Year: 2019, Position: SF, Rows: 18
Year: 2019, Position: PF, Rows: 18
Year: 2019, Position: C, Rows: 11
Year: 2020, Position: PG, Rows: 24
Year: 2020, Position: SG, Rows: 18
Year: 2020, Position: SF, Rows: 28
Year: 2020, Position: PF, Rows: 22
Year: 2020, Position: C, Rows: 22
Year: 2021, Position: PG, Rows: 24
Year: 2021, Position: SG, Rows: 27
Year: 2021, Position: SF, Rows: 32
Year: 2021, Position: PF, Rows: 24
Year: 2021, Position: C, Rows: 23
Year: 2022, Position: PG, Rows: 32
Year: 2022, Position: SG, Rows: 26
Year: 2022, Position: SF, Rows: 32
Year: 2022, Position: PF, Rows: 24
Year: 2022, Position: C, Rows: 24
Year: 2023, Position: PG, Rows: 32
Year: 2023, Position: SG, Rows: 24
Year: 2023, Position: SF, Rows: 38
Year: 2023, Position: PF, Rows: 30
Year: 2023, Position: C, Rows: 30
Year: 2024, Position: PG, Rows: 29
Year: 2024, Position: SG, Rows: 30
Year: 2024, Position: SF, Rows: 31
Year: 2024, Position: PF,

In [2]:
# Check data type for columns
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 758 entries, 0 to 757
Data columns (total 25 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Year      758 non-null    int64 
 1   Position  758 non-null    object
 2   #         758 non-null    object
 3   Player    758 non-null    object
 4   Team      758 non-null    object
 5   GP        758 non-null    object
 6   MPG       758 non-null    object
 7   PPG       758 non-null    object
 8   FGM       758 non-null    object
 9   FGA       758 non-null    object
 10  FG%       758 non-null    object
 11  3PM       758 non-null    object
 12  3PA       758 non-null    object
 13  3P%       758 non-null    object
 14  FTM       758 non-null    object
 15  FTA       758 non-null    object
 16  FT%       758 non-null    object
 17  ORB       758 non-null    object
 18  DRB       758 non-null    object
 19  RPG       758 non-null    object
 20  APG       758 non-null    object
 21  SPG       758 no

In [3]:
# List of columns to be changed from object to float
floats = ['MPG', 'PPG', 'FGM', 'FGA', 'FG%', '3PM', '3PA', '3P%', 'FTM', 'FTA', 'FT%', 'ORB', 'DRB', 'RPG', 'APG', 'SPG', 'BPG', 'TOV', 'PF']
for column in floats:
    df[column] = pd.to_numeric(df[column], errors='raise')

# List of columns to be changed from object to int
integers = ['#', 'GP']
for column in integers:
    df[column] = pd.to_numeric(df[column], errors='raise').astype('Int64')

df.head()


Unnamed: 0,Year,Position,#,Player,Team,GP,MPG,PPG,FGM,FGA,FG%,3PM,3PA,3P%,FTM,FTA,FT%,ORB,DRB,RPG,APG,SPG,BPG,TOV,PF
0,2019,PG,1,Tomas Delininkaitis,NEP,44,17.7,28.4,8.1,17.1,0.475,6.0,13.0,0.464,6.1,6.6,0.917,0.5,3.7,4.2,4.9,1.7,0.0,3.6,5.5
1,2019,PG,2,Ken Brown,DZU,35,24.9,27.0,8.9,20.7,0.428,4.2,11.4,0.372,5.0,5.7,0.875,0.7,4.5,5.2,7.2,1.6,0.1,4.7,5.0
2,2019,PG,3,Chauncey Collins,KED,20,26.6,26.5,8.8,20.3,0.436,3.8,11.0,0.344,5.1,6.4,0.789,1.0,4.5,5.5,5.5,1.4,0.0,4.2,3.3
3,2019,PG,4,Jamar Wilson,TEC,35,23.1,24.6,8.5,15.8,0.538,2.0,4.4,0.459,5.6,6.7,0.839,1.4,4.5,5.9,6.5,1.6,0.1,2.7,5.5
4,2019,PG,5,Ignas Vaitkus,SIA,30,26.4,22.2,7.9,18.7,0.424,2.5,7.6,0.336,3.8,4.8,0.785,0.7,7.9,8.6,3.6,1.6,0.3,3.2,3.6


In [4]:
# Rename positions to match NBA Stats
df['Position'] = df['Position'].replace(['PG', 'SG'], 'G')
df['Position'] = df['Position'].replace(['SF', 'PF'], 'F')

In [5]:
# Convert stats from 48 minutes to 100 possessions
df.loc[:, 'MPG':] = df.loc[:,'MPG':].mul(1.25)

In [7]:
# Saving DataFrame to a CSV file
df.to_csv('lkl_stats.csv', index=False)