In [None]:
import sys
import os

# Add the current directory to PYTHONPATH (assuming main.ipynb is at the same directory level as data_extraction folder)
project_root = os.path.abspath(os.path.join(os.getcwd()))
sys.path.insert(0, project_root)

# Import extraction functions
from data_extraction.nba import extract_nba_player_salaries, extract_nba_team_salaries
from data_extraction.wnba import extract_wnba_player_salaries, extract_wnba_team_salaries

# Import the new functions for calculating WNBA PER
from data_extraction._offensive_per_wnba import calculate_and_save_offensive_per
from data_extraction._defensive_per_wnba import calculate_and_save_defensive_per

# Import the new function for extracting and calculating NBA PER
from data_extraction._nba_per import extract_and_calculate_per

# Extract NBA player salaries
extract_nba_player_salaries("https://hoopshype.com/salaries/players/2023-2024/", 'nba_player_salaries_2024.csv')

# Extract NBA team salaries
extract_nba_team_salaries("https://hoopshype.com/salaries/2023-2024/", 'nba_team_salaries_2024.csv')

# Extract WNBA player salaries
extract_wnba_player_salaries("https://herhoopstats.com/salary-cap-sheet/wnba/players/salary_2024/stats_2024/", 'wnba_player_salaries_2024.csv')

# Extract WNBA team salaries
extract_wnba_team_salaries("https://herhoopstats.com/salary-cap-sheet/wnba/summary/2024/", 'wnba_team_salaries_2024.csv')

# Calculate and save Offensive PER for WNBA players
calculate_and_save_offensive_per("https://herhoopstats.com/salary-cap-sheet/wnba/players/salary_2024/stats_2024/", "wnba_top_50_offensive_per.csv")

# Calculate and save Defensive PER for WNBA players
calculate_and_save_defensive_per("https://herhoopstats.com/salary-cap-sheet/wnba/players/salary_2024/stats_2024/", "wnba_top_50_defensive_per.csv")

# Extract and calculate NBA PER for both Offensive and Defensive
extract_and_calculate_per()

print("All data extraction and PER calculations completed.")

In [None]:
# Converting to NUMERIC placeholder:
import pandas as pd

# Loading CSV:
df = pd.read_csv('wnba_player_salaries_2024.csv')

# Convert Salary column to numeric:
df = df[df['2024 Salary'].str.startswith('$', na=False)]
df['2024 Salary'] = df['2024 Salary'].replace(r'[\$,]', '', regex=True).astype(float)

# Remove duplicates, keep highest salary:
df_cleaned = df.sort_values('2024 Salary', ascending=False).drop_duplicates(subset=['Player'], keep='first')

# Created new "cleaned_" CSV:
df_cleaned.to_csv('cleaned_wnba_player_salaries_2024.csv', index=False)
print("Cleaned data saved to 'cleaned_wnba_player_salaries_2024.csv'.")

In [None]:
import sys
import os

# Add the current directory to PYTHONPATH (assuming main.ipynb is at the same directory level as data_extraction and data_processing folders)
project_root = os.path.abspath(os.path.join(os.getcwd()))
sys.path.insert(0, project_root)

# Import extraction functions
from data_extraction.nba import extract_nba_player_salaries, extract_nba_team_salaries
from data_extraction.wnba import extract_wnba_player_salaries, extract_wnba_team_salaries

# Import the new functions for calculating WNBA PER
from data_extraction._offensive_per_wnba import calculate_and_save_offensive_per
from data_extraction._defensive_per_wnba import calculate_and_save_defensive_per

# Import the new function for extracting and calculating NBA PER
from data_extraction._nba_per import extract_and_calculate_per

# Import the cleaning function for player salaries
from data_processing.clean_salaries import clean_and_extract_top_salaries

# Paths to the input and output files for salary cleaning
nba_file = 'nba_player_salaries_2024.csv'
wnba_file = 'wnba_player_salaries_2024.csv'
output_nba_file = 'cleaned_data/top_150_nba_player_salaries_2024.csv'
output_wnba_file = 'cleaned_data/top_150_wnba_player_salaries_2024.csv'

# Clean and process player salaries
clean_and_extract_top_salaries(nba_file, wnba_file, output_nba_file, output_wnba_file)

# Extract NBA player salaries
extract_nba_player_salaries("https://hoopshype.com/salaries/players/2023-2024/", 'nba_player_salaries_2024.csv')

# Extract NBA team salaries
extract_nba_team_salaries("https://hoopshype.com/salaries/2023-2024/", 'nba_team_salaries_2024.csv')

# Extract WNBA player salaries
extract_wnba_player_salaries("https://herhoopstats.com/salary-cap-sheet/wnba/players/salary_2024/stats_2024/", 'wnba_player_salaries_2024.csv')

# Extract WNBA team salaries
extract_wnba_team_salaries("https://herhoopstats.com/salary-cap-sheet/wnba/summary/2024/", 'wnba_team_salaries_2024.csv')

# Calculate and save Offensive PER for WNBA players
calculate_and_save_offensive_per("https://herhoopstats.com/salary-cap-sheet/wnba/players/salary_2024/stats_2024/", "wnba_top_50_offensive_per.csv")

# Calculate and save Defensive PER for WNBA players
calculate_and_save_defensive_per("https://herhoopstats.com/salary-cap-sheet/wnba/players/salary_2024/stats_2024/", "wnba_top_50_defensive_per.csv")

# Extract and calculate NBA PER for both Offensive and Defensive
extract_and_calculate_per()

print("All data extraction, cleaning, and PER calculations completed.")