In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd 

# Season 2024 / All games
# The number of points of a player
url = "https://www.teamrankings.com/nba/player-stat/points"
# Fetching HTML content:
response = requests.get(url)
if response.status_code == 200:
    print("Done! - Successfully fetched the webpage.")
else:
    print(f"Failed to fetch the webpage: {response.status_code}")

# Parsing HTML content:
soup = BeautifulSoup(response.content, 'html.parser')

# Inspect page and find table containing needed data (Assuming the data is in a table.):
table = soup.find('table')
type(table)

# Extract headers:
headers = []
for th in table.find('thead').find_all('th'):
    headers.append(th.text.strip())

# Extract rows:
rows = []
for tr in table.find('tbody').find_all('tr'):
    cells = tr.find_all('td')
    row = [cell.text.strip() for cell in cells]
    rows.append(row)

# Create DataFrame
df = pd.DataFrame(rows, columns=headers)

# Save data to CSV:
df.to_csv('player-stat_2024.csv', index=False)

print("Data successfully scraped and saved to 'player-stat_2024.csv'")





In [None]:
df

In [None]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def extraction_webscrapping(url, output_file, header_tag, keep_columns=None):
    response = requests.get(url)
    if response.status_code == 200:
        print("Successfully fetched the webpage.")
    else:
        print(f"Failed to fetch the webpage: {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    
    headers = []
    for th in table.find('thead').find_all(header_tag):
        headers.append(th.text.strip())
    print(f"Headers found: {headers}")
    
    rows = []
    for tr in table.find('tbody').find_all('tr'):
        cells = tr.find_all('td')
        row = [cell.text.strip() for cell in cells]
        rows.append(row)
    
    df = pd.DataFrame(rows, columns=headers)
    
    if keep_columns:
        print(f"Filtering to keep columns: {keep_columns}")
        df = df[keep_columns]
    
    df.to_csv(output_file, index=False)
    print(f"Data successfully scraped and saved to {output_file}")

def extract_wnba_player_salaries(url, output_file):
    response = requests.get(url)
    if response.status_code == 200:
        print("Successfully fetched the webpage.")
    else:
        print(f"Failed to fetch the webpage: {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    
    headers = ["Player", "2024 Salary"]
    rows = []
    
    for tr in table.find('tbody').find_all('tr'):
        cells = tr.find_all('td')
        name = cells[0].text.strip().split('\n')[0]  # Only take first part for the name
        salary = cells[1].text.strip().split()[0]    # Only take the salary amount
        row = [name, salary]
        rows.append(row)
    
    df = pd.DataFrame(rows, columns=headers)
    df.to_csv(output_file, index=False)
    print(f"Data successfully scraped and saved to {output_file}")

#Number of points of a player (per game)
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/points", 'player-stat_2024.csv', 'th')
#Effective field goal % (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/efg-percentage", 'efg-percentage_2024.csv', 'th')
#Assists (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/assists", 'assists_2024.csv', 'th')
#Win score (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/win-score", 'win_score_2024.csv', 'th')
#Minutes played (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/minutes-played",'minutes_played.csv','th')



In [None]:
import pandas as pd

############  READ THE CSV ###########
#Assists per game
df_assists = pd.read_csv('assists_2024.csv')
df_assists.rename(columns={'Value':'AST' } , inplace=True)
display('Assists per game')
display(df_assists)

#Points per game
df_player_stats = pd.read_csv('player-stat_2024.csv')
df_player_stats.rename(columns={'Value':'PTS' } , inplace=True)
display('Points per game')
display(df_player_stats)

#Games Played  --> Minutes Played ?!
df_minutes_played = pd.read_csv('minutes_played.csv')
df_player_stats.rename(columns={'Value':'Minutes_played_per_game' } , inplace=True)
display('Minutes Played')
display(df_minutes_played)

#Offensive rebounds (per game):
df_OR = pd.read_csv('rebounds-offensive.csv')
df_OR.rename(columns={'Value':'ORB' } , inplace=True)
display('Offensive rebounds per game')
display(df_OR)
#Defensive rebounds (per game):
df_DRB = pd.read_csv('rebounds-defensive.csv')
df_DRB.rename(columns={'Value':'DRB' } , inplace=True)
display('Defensive rebounds per game')
display(df_DRB)
#Blocks (per game):
df_BLK = pd.read_csv('blocks.csv')
df_BLK.rename(columns={'Value':'BLK' } , inplace=True)
display('Blocks per game')
display(df_BLK)
#Steal (per game):
df_STL = pd.read_csv('steals.csv')
df_STL.rename(columns={'Value':'STL' } , inplace=True)
display('Steal per game')
display(df_STL)

############  Merge ###########

#Merge vs points and assist (merged):
df_points_and_assists = df_assists.merge(df_player_stats, on=["Player" , "Pos", "Team"])
display("Points and assist (merged):")
display(df_points_and_assists)

#Final result: df_points_and_assists merged with df_OR (offensive rebounds) sorted by Points_per_game:
second_merge = df_points_and_assists.merge(df_OR , on=["Player" , "Pos", "Team"])
second_merge.rename(columns={'Value':'Minutes_played_per_game' } , inplace=True)
second_merge.sort_values(by=['PTS']).reset_index()
# Remove of columns : Rank_x , Rank_y , (Pos also?)
second_merge.drop(columns = ['Rank_x', 'Rank_y','Rank'] , axis=1, inplace=True)
#second_merge.drop(columns = ['Rank_x', 'Rank_y'] , axis=1, inplace=True)
#Finally, PER computation
second_merge['PER'] = round((second_merge['AST'] + second_merge['PTS']  + second_merge['ORB'])/3 ,1)
#second_merge.sort_values(by=['PTS']).reset_index()
display("Points_and_assist merged with offensive rebounds and sorted by Points_per_game PTS (offensive PER rounded):")
display(second_merge)

##### Defense Merge #####
# a) Merge of PTS and df_DRB

#second_merge[second_merge['PTS']].merge(df_DRB)
# b) Merge df_DRB and df_BLK
#df_DRB.merge(df_BLK , on=["Player" , "Pos", "Team"])

