In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd 

# Season 2024 / All games
# The number of points of a player
url = "https://www.teamrankings.com/nba/player-stat/points"
# Fetching HTML content:
response = requests.get(url)
if response.status_code == 200:
    print("Done! - Successfully fetched the webpage.")
else:
    print(f"Failed to fetch the webpage: {response.status_code}")

# Parsing HTML content:
soup = BeautifulSoup(response.content, 'html.parser')

# Inspect page and find table containing needed data (Assuming the data is in a table.):
table = soup.find('table')
type(table)

# Extract headers:
headers = []
for th in table.find('thead').find_all('th'):
    headers.append(th.text.strip())

# Extract rows:
rows = []
for tr in table.find('tbody').find_all('tr'):
    cells = tr.find_all('td')
    row = [cell.text.strip() for cell in cells]
    rows.append(row)

# Create DataFrame
df = pd.DataFrame(rows, columns=headers)

# Save data to CSV:
df.to_csv('player-stat_2024.csv', index=False)

print("Data successfully scraped and saved to 'player-stat_2024.csv'")





Done! - Successfully fetched the webpage.
Data successfully scraped and saved to 'player-stat_2024.csv'


In [6]:
df

Unnamed: 0,Rank,Player,Team,Pos,Value
0,1,Luka Doncic,Dallas Mavericks,SF,32.66
1,2,Giannis Antetokounmpo,Milwaukee Bucks,SF,30.44
2,3,Shai Gilgeous-Alexander,Oklahoma City Thunder,PG,30.07
3,4,Jalen Brunson,New York Knicks,PG,29.26
4,5,Devin Booker,Phoenix Suns,SG,27.10
...,...,...,...,...,...
95,96,Cameron Johnson,Brooklyn Nets,PF,13.38
96,97,Corey Kispert,Washington Wizards,SF,13.38
97,98,Grayson Allen,Phoenix Suns,SG,13.26
98,99,Andrew Wiggins,Golden State Warriors,SF,13.21


In [4]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def extraction_webscrapping(url, output_file, header_tag, keep_columns=None):
    response = requests.get(url)
    if response.status_code == 200:
        print("Successfully fetched the webpage.")
    else:
        print(f"Failed to fetch the webpage: {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    
    headers = []
    for th in table.find('thead').find_all(header_tag):
        headers.append(th.text.strip())
    print(f"Headers found: {headers}")
    
    rows = []
    for tr in table.find('tbody').find_all('tr'):
        cells = tr.find_all('td')
        row = [cell.text.strip() for cell in cells]
        rows.append(row)
    
    df = pd.DataFrame(rows, columns=headers)
    
    if keep_columns:
        print(f"Filtering to keep columns: {keep_columns}")
        df = df[keep_columns]
    
    df.to_csv(output_file, index=False)
    print(f"Data successfully scraped and saved to {output_file}")

def extract_wnba_player_salaries(url, output_file):
    response = requests.get(url)
    if response.status_code == 200:
        print("Successfully fetched the webpage.")
    else:
        print(f"Failed to fetch the webpage: {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    
    headers = ["Player", "2024 Salary"]
    rows = []
    
    for tr in table.find('tbody').find_all('tr'):
        cells = tr.find_all('td')
        name = cells[0].text.strip().split('\n')[0]  # Only take first part for the name
        salary = cells[1].text.strip().split()[0]    # Only take the salary amount
        row = [name, salary]
        rows.append(row)
    
    df = pd.DataFrame(rows, columns=headers)
    df.to_csv(output_file, index=False)
    print(f"Data successfully scraped and saved to {output_file}")

#Number of points of a player (per game)
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/points", 'player-stat_2024.csv', 'th')
#Effective field goal % (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/efg-percentage", 'efg-percentage_2024.csv', 'th')
#Assists (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/assists", 'assists_2024.csv', 'th')
#Win score (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/win-score", 'win_score_2024.csv', 'th')
#Minutes played (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/minutes-played",'minutes_played.csv','th')
#Offensive rebounds (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/rebounds-offensive",'rebounds-offensive.csv','th')
#Defensive rebounds (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/rebounds-defensive",'rebounds-defensive.csv','th')
#Blocks (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/blocks",'blocks.csv','th')
#Steal (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/steals",'steals.csv','th')

Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to player-stat_2024.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to efg-percentage_2024.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to assists_2024.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to win_score_2024.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to minutes_played.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to rebounds-offensive.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']

In [28]:
import pandas as pd

############  READ THE CSV ###########
#Assists per game
df_assists = pd.read_csv('assists_2024.csv')
df_assists.rename(columns={'Value':'AST' } , inplace=True)
display('Assists per game')
display(df_assists)

#Points per game
df_player_stats = pd.read_csv('player-stat_2024.csv')
df_player_stats.rename(columns={'Value':'PTS' } , inplace=True)
display('Points per game')
display(df_player_stats)

#Games Played  --> Minutes Played ?!
df_minutes_played = pd.read_csv('minutes_played.csv')
df_player_stats.rename(columns={'Value':'Minutes_played_per_game' } , inplace=True)
display('Minutes Played')
display(df_minutes_played)

#Offensive rebounds (per game):
df_OR = pd.read_csv('rebounds-offensive.csv')
df_OR.rename(columns={'Value':'ORB' } , inplace=True)
display('Offensive rebounds per game')
display(df_OR)
#Defensive rebounds (per game):
df_DRB = pd.read_csv('rebounds-defensive.csv')
df_DRB.rename(columns={'Value':'DRB' } , inplace=True)
display('Defensive rebounds per game')
display(df_DRB)
#Blocks (per game):
df_BLK = pd.read_csv('blocks.csv')
df_BLK.rename(columns={'Value':'BLK' } , inplace=True)
display('Blocks per game')
display(df_BLK)
#Steal (per game):
df_STL = pd.read_csv('steals.csv')
df_STL.rename(columns={'Value':'STL' } , inplace=True)
display('Steal per game')
display(df_STL)

############  Merge ###########

#Merge vs points and assist (merged):
df_points_and_assists = df_assists.merge(df_player_stats, on=["Player" , "Pos", "Team"])
display("Points and assist (merged):")
display(df_points_and_assists)

#Final result: df_points_and_assists merged with df_OR (offensive rebounds) sorted by Points_per_game:
second_merge = df_points_and_assists.merge(df_OR , on=["Player" , "Pos", "Team"])
second_merge.rename(columns={'Value':'Minutes_played_per_game' } , inplace=True)
second_merge.sort_values(by=['PTS']).reset_index()
# Remove of columns : Rank_x , Rank_y , (Pos also?)
second_merge.drop(columns = ['Rank_x', 'Rank_y','Rank'] , axis=1, inplace=True)
#second_merge.drop(columns = ['Rank_x', 'Rank_y'] , axis=1, inplace=True)
#Finally, PER computation
second_merge['PER'] = round((second_merge['AST'] + second_merge['PTS']  + second_merge['ORB'])/3 ,1)
#second_merge.sort_values(by=['PTS']).reset_index()
display("Points_and_assist merged with offensive rebounds and sorted by Points_per_game PTS (offensive PER rounded):")
display(second_merge)

##### Defense Merge #####
# a) Merge of PTS and df_DRB

#second_merge[second_merge['PTS']].merge(df_DRB)
# b) Merge df_DRB and df_BLK
#df_DRB.merge(df_BLK , on=["Player" , "Pos", "Team"])



'Assists per game'

Unnamed: 0,Rank,Player,Team,Pos,AST
0,1,Tyrese Haliburton,Indiana Pacers,PG,10.42
1,2,Luka Doncic,Dallas Mavericks,SF,9.39
2,3,Nikola Jokic,Denver Nuggets,C,8.92
3,4,James Harden,Los Angeles Clippers,SG,8.49
4,5,LeBron James,Los Angeles Lakers,SF,8.28
...,...,...,...,...,...
95,96,Karl-Anthony Towns,Minnesota Timberwolves,C,2.96
96,97,Cam Thomas,Brooklyn Nets,SG,2.91
97,98,Joe Ingles,Orlando Magic,SF,2.87
98,99,Bruce Brown,Toronto Raptors,PG,2.84


'Points per game'

Unnamed: 0,Rank,Player,Team,Pos,PTS
0,1,Luka Doncic,Dallas Mavericks,SF,32.66
1,2,Giannis Antetokounmpo,Milwaukee Bucks,SF,30.44
2,3,Shai Gilgeous-Alexander,Oklahoma City Thunder,PG,30.07
3,4,Jalen Brunson,New York Knicks,PG,29.26
4,5,Devin Booker,Phoenix Suns,SG,27.10
...,...,...,...,...,...
95,96,Cameron Johnson,Brooklyn Nets,PF,13.38
96,97,Corey Kispert,Washington Wizards,SF,13.38
97,98,Grayson Allen,Phoenix Suns,SG,13.26
98,99,Andrew Wiggins,Golden State Warriors,SF,13.21


'Minutes Played'

Unnamed: 0,Rank,Player,Team,Pos,Value
0,1,Luka Doncic,Dallas Mavericks,SF,38.30
1,2,Tyrese Maxey,Philadelphia 76ers,PG,38.14
2,3,DeMar DeRozan,Chicago Bulls,SG,37.95
3,4,Kevin Durant,Phoenix Suns,SF,37.46
4,5,Miles Bridges,Charlotte Hornets,SF,37.41
...,...,...,...,...,...
95,96,Luguentz Dort,Oklahoma City Thunder,SG,29.17
96,97,Jalen Duren,Detroit Pistons,C,29.14
97,98,Malik Beasley,Milwaukee Bucks,SG,29.04
98,99,Kristaps Porzingis,Boston Celtics,PF,28.99


'Offensive rebounds per game'

Unnamed: 0,Rank,Player,Team,Pos,ORB
0,1,Clint Capela,Atlanta Hawks,C,4.58
1,2,Rudy Gobert,Minnesota Timberwolves,C,3.57
2,3,Domantas Sabonis,Sacramento Kings,PF,3.55
3,4,Isaiah Hartenstein,New York Knicks,C,3.33
4,5,Andre Drummond,Chicago Bulls,C,3.28
...,...,...,...,...,...
95,96,Trey Lyles,Sacramento Kings,PF,1.07
96,97,Luguentz Dort,Oklahoma City Thunder,SG,1.04
97,98,Franz Wagner,Orlando Magic,SF,1.04
98,99,Zeke Nnaji,Denver Nuggets,PF,1.03


'Defensive rebounds per game'

Unnamed: 0,Rank,Player,Team,Pos,DRB
0,1,Domantas Sabonis,Sacramento Kings,PF,10.10
1,2,Anthony Davis,Los Angeles Lakers,PF,9.75
2,3,Nikola Jokic,Denver Nuggets,C,9.57
3,4,Giannis Antetokounmpo,Milwaukee Bucks,SF,8.84
4,5,Rudy Gobert,Minnesota Timberwolves,C,8.84
...,...,...,...,...,...
95,96,Larry Nance Jr.,New Orleans Pelicans,SF,3.63
96,97,Brook Lopez,Milwaukee Bucks,C,3.61
97,98,James Wiseman,Detroit Pistons,C,3.59
98,99,Jonathan Kuminga,Golden State Warriors,PF,3.57


'Blocks per game'

Unnamed: 0,Rank,Player,Team,Pos,BLK
0,1,Victor Wembanyama,San Antonio Spurs,PF,3.58
1,2,Walker Kessler,Utah Jazz,C,2.41
2,3,Chet Holmgren,Oklahoma City Thunder,C,2.34
3,4,Anthony Davis,Los Angeles Lakers,PF,2.33
4,5,Brook Lopez,Milwaukee Bucks,C,2.31
...,...,...,...,...,...
95,96,Trey Murphy III,New Orleans Pelicans,SF,0.56
96,97,Kyle Anderson,Minnesota Timberwolves,SF,0.55
97,98,Jaylen Brown,Boston Celtics,SG,0.55
98,98,Luguentz Dort,Oklahoma City Thunder,SG,0.55


'Steal per game'

Unnamed: 0,Rank,Player,Team,Pos,STL
0,1,De'Aaron Fox,Sacramento Kings,PG,2.04
1,2,Shai Gilgeous-Alexander,Oklahoma City Thunder,PG,1.92
2,3,Donovan Mitchell,Cleveland Cavaliers,SG,1.75
3,4,Matisse Thybulle,Portland Trail Blazers,SG,1.74
4,5,Alex Caruso,Chicago Bulls,SG,1.66
...,...,...,...,...,...
95,96,Khris Middleton,Milwaukee Bucks,SF,0.89
96,97,Jaden McDaniels,Minnesota Timberwolves,SF,0.88
97,98,Xavier Tillman,Boston Celtics,PF,0.87
98,99,Kyle Anderson,Minnesota Timberwolves,SF,0.86


'Points and assist (merged):'

Unnamed: 0,Rank_x,Player,Team,Pos,AST,Rank_y,PTS
0,1,Tyrese Haliburton,Indiana Pacers,PG,10.42,38,19.87
1,2,Luka Doncic,Dallas Mavericks,SF,9.39,1,32.66
2,3,Nikola Jokic,Denver Nuggets,C,8.92,8,26.69
3,4,James Harden,Los Angeles Clippers,SG,8.49,62,16.91
4,5,LeBron James,Los Angeles Lakers,SF,8.28,14,25.74
...,...,...,...,...,...,...,...
66,94,Tobias Harris,Philadelphia 76ers,SF,2.99,65,16.45
67,95,Grayson Allen,Phoenix Suns,SG,2.97,98,13.26
68,96,Karl-Anthony Towns,Minnesota Timberwolves,C,2.96,31,21.21
69,97,Cam Thomas,Brooklyn Nets,SG,2.91,26,22.45


'Points_and_assist merged with offensive rebounds and sorted by Points_per_game PTS (offensive PER rounded):'

Unnamed: 0,Player,Team,Pos,AST,PTS,ORB,PER
0,Nikola Jokic,Denver Nuggets,C,8.92,26.69,2.92,12.8
1,Domantas Sabonis,Sacramento Kings,PF,8.18,19.43,3.55,10.4
2,Giannis Antetokounmpo,Milwaukee Bucks,SF,6.52,30.44,2.68,13.2
3,Scottie Barnes,Toronto Raptors,SF,6.05,19.85,2.35,9.4
4,Paolo Banchero,Orlando Magic,PF,5.28,22.91,1.1,9.8
5,Zion Williamson,New Orleans Pelicans,PF,5.03,23.11,1.76,10.0
6,Jimmy Butler,Miami Heat,PF,4.97,20.74,1.75,9.2
7,Alperen Sengun,Houston Rockets,C,4.95,21.13,2.9,9.7
8,Pascal Siakam,Indiana Pacers,PF,4.19,21.69,1.87,9.3
9,Bam Adebayo,Miami Heat,C,3.87,19.27,2.23,8.5
