In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import os

def extraction_webscrapping(url, output_file, header_tag, keep_columns=None):
    response = requests.get(url)
    if response.status_code == 200:
        print("Successfully fetched the webpage.")
    else:
        print(f"Failed to fetch the webpage: {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    
    headers = []
    for th in table.find('thead').find_all(header_tag):
        headers.append(th.text.strip())
    print(f"Headers found: {headers}")
    
    rows = []
    for tr in table.find('tbody').find_all('tr'):
        cells = tr.find_all('td')
        row = [cell.text.strip() for cell in cells]
        rows.append(row)
    
    df = pd.DataFrame(rows, columns=headers)
    
    if keep_columns:
        print(f"Filtering to keep columns: {keep_columns}")
        df = df[keep_columns]
    
    output_path = os.path.join('csv', output_file)
    #df.to_csv(,output_file, index=False)
    df.to_csv(output_path, index=False)
    print(f"Data successfully scraped and saved to {output_file}")
    print(f"Data successfully scraped")

def extract_wnba_player_salaries(url, output_file):
    response = requests.get(url)
    if response.status_code == 200:
        print("Successfully fetched the webpage.")
    else:
        print(f"Failed to fetch the webpage: {response.status_code}")

    soup = BeautifulSoup(response.content, 'html.parser')
    table = soup.find('table')
    
    headers = ["Player", "2024 Salary"]
    rows = []
    
    for tr in table.find('tbody').find_all('tr'):
        cells = tr.find_all('td')
        name = cells[0].text.strip().split('\n')[0]  # Only take first part for the name
        salary = cells[1].text.strip().split()[0]    # Only take the salary amount
        row = [name, salary]
        rows.append(row)
    
    df = pd.DataFrame(rows, columns=headers)
    df.to_csv(output_file, index=False)
    print(f"Data successfully scraped and saved to {output_file}")

#Number of points of a player (per game)
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/points", 'player-stat_2024.csv', 'th')
#Effective field goal % (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/efg-percentage", 'efg-percentage_2024.csv', 'th')
#Assists (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/assists", 'assists_2024.csv', 'th')
#Win score (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/win-score", 'win_score_2024.csv', 'th')
#Minutes played (per game):
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/minutes-played",'minutes_played.csv','th')
# 'rebounds-offensive.csv'
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/rebounds-offensive",'rebounds-offensive.csv','th')
# 'rebounds-defensive.csv'
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/rebounds-defensive",'rebounds-defensive.csv','th')
# 'blocks.csv'
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/blocks",'blocks.csv','th')
# steals
extraction_webscrapping("https://www.teamrankings.com/nba/player-stat/steals",'steals.csv','th')

Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to player-stat_2024.csv
Data successfully scraped
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to efg-percentage_2024.csv
Data successfully scraped
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to assists_2024.csv
Data successfully scraped
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to win_score_2024.csv
Data successfully scraped
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scraped and saved to minutes_played.csv
Data successfully scraped
Successfully fetched the webpage.
Headers found: ['Rank', 'Player', 'Team', 'Pos', 'Value']
Data successfully scrap