In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd 

# Season 2024 / All games  
# The number of points of a player
url = "https://www.teamrankings.com/nba/player-stat/points"
# Fetching HTML content:
response = requests.get(url)
if response.status_code == 200:
    print("Done! - Successfully fetched the webpage.")
else:
    print(f"Failed to fetch the webpage: {response.status_code}")

# Parsing HTML content:
soup = BeautifulSoup(response.content, 'html.parser')

# Inspect page and find table containing needed data (Assuming the data is in a table.):
table = soup.find('table')
type(table)

# Extract headers:
headers = []
for th in table.find('thead').find_all('th'):
    headers.append(th.text.strip())

# Extract rows:
rows = []
for tr in table.find('tbody').find_all('tr'):
    cells = tr.find_all('td')
    row = [cell.text.strip() for cell in cells]
    rows.append(row)

# Create DataFrame
df = pd.DataFrame(rows, columns=headers)

# Save data to CSV:
df.to_csv('player-stat_2024.csv', index=False)

print("Data successfully scraped and saved to 'player-stat_2024.csv'")





Done! - Successfully fetched the webpage.
Data successfully scraped and saved to 'player-stat_2024.csv'


In [6]:
df

Unnamed: 0,Rank,Player,Team,Pos,Value
0,1,Luka Doncic,Dallas Mavericks,SF,32.66
1,2,Giannis Antetokounmpo,Milwaukee Bucks,SF,30.44
2,3,Shai Gilgeous-Alexander,Oklahoma City Thunder,PG,30.07
3,4,Jalen Brunson,New York Knicks,PG,29.26
4,5,Devin Booker,Phoenix Suns,SG,27.10
...,...,...,...,...,...
95,96,Cameron Johnson,Brooklyn Nets,PF,13.38
96,97,Corey Kispert,Washington Wizards,SF,13.38
97,98,Grayson Allen,Phoenix Suns,SG,13.26
98,99,Andrew Wiggins,Golden State Warriors,SF,13.21


In [5]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def extraction_webscrapping(url, output_file, header_tag, keep_columns=None):
    response = requests.get(url)
    if response.status_code == 200:
        print("Successfully fetched the webpage.")
    else:
        print(f"Failed to fetch the webpage: {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    
    headers = []
    for th in table.find('thead').find_all(header_tag):
        headers.append(th.text.strip())
    print(f"Headers found: {headers}")
    
    rows = []
    for tr in table.find('tbody').find_all('tr'):
        cells = tr.find_all('td')
        row = [cell.text.strip() for cell in cells]
        rows.append(row)
    
    df = pd.DataFrame(rows, columns=headers)
    
    if keep_columns:
        print(f"Filtering to keep columns: {keep_columns}")
        df = df[keep_columns]
    
    df.to_csv(output_file, index=False)
    print(f"Data successfully scraped and saved to {output_file}")

def extract_wnba_player_salaries(url, output_file):
    response = requests.get(url)
    if response.status_code == 200:
        print("Successfully fetched the webpage.")
    else:
        print(f"Failed to fetch the webpage: {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    
    headers = ["Player", "2024 Salary"]
    rows = []
    
    for tr in table.find('tbody').find_all('tr'):
        cells = tr.find_all('td')
        name = cells[0].text.strip().split('\n')[0]  # Only take first part for the name
        salary = cells[1].text.strip().split()[0]    # Only take the salary amount
        row = [name, salary]
        rows.append(row)
    
    df = pd.DataFrame(rows, columns=headers)
    df.to_csv(output_file, index=False)
    print(f"Data successfully scraped and saved to {output_file}")

#Number of points of a player (per game)
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/points", 'player-stat_2024.csv', 'th')
#Effective field goal % (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/efg-percentage", 'efg-percentage_2024.csv', 'th')
#Assists (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/assists", 'assists_2024.csv', 'th')
#Win score (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/win-score", 'win_score_2024.csv', 'th')
#Minutes played (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/minutes-played",'minutes_played.csv','th')



Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to player-stat_2024.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to efg-percentage_2024.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to assists_2024.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to win_score_2024.csv
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to minutes_played.csv


In [7]:
import pandas as pd
#Assists per game
df_assists = pd.read_csv('assists_2024.csv')
display(df_assists)

#Points per game
df_player_stats = pd.read_csv('player-stat_2024.csv')
display(df_player_stats)

#Games Played


Unnamed: 0,Rank,Player,Team,Pos,Value
0,1,Tyrese Haliburton,Indiana Pacers,PG,10.42
1,2,Luka Doncic,Dallas Mavericks,SF,9.39
2,3,Nikola Jokic,Denver Nuggets,C,8.92
3,4,James Harden,Los Angeles Clippers,SG,8.49
4,5,LeBron James,Los Angeles Lakers,SF,8.28
...,...,...,...,...,...
95,96,Karl-Anthony Towns,Minnesota Timberwolves,C,2.96
96,97,Cam Thomas,Brooklyn Nets,SG,2.91
97,98,Joe Ingles,Orlando Magic,SF,2.87
98,99,Bruce Brown,Toronto Raptors,PG,2.84


Unnamed: 0,Rank,Player,Team,Pos,Value
0,1,Luka Doncic,Dallas Mavericks,SF,32.66
1,2,Giannis Antetokounmpo,Milwaukee Bucks,SF,30.44
2,3,Shai Gilgeous-Alexander,Oklahoma City Thunder,PG,30.07
3,4,Jalen Brunson,New York Knicks,PG,29.26
4,5,Devin Booker,Phoenix Suns,SG,27.10
...,...,...,...,...,...
95,96,Cameron Johnson,Brooklyn Nets,PF,13.38
96,97,Corey Kispert,Washington Wizards,SF,13.38
97,98,Grayson Allen,Phoenix Suns,SG,13.26
98,99,Andrew Wiggins,Golden State Warriors,SF,13.21
