In [None]:
import sys
import os

# Add the current directory to PYTHONPATH
project_root = os.path.abspath(os.path.join(os.getcwd()))
sys.path.insert(0, project_root)

# Verify PYTHONPATH
print(sys.path)

# Import necessary functions and modules
from data_extraction.nba import extract_nba_player_salaries, extract_nba_team_salaries
from data_extraction.wnba import extract_wnba_player_salaries, extract_wnba_team_salaries
from data_extraction._offensive_per_wnba import calculate_and_save_offensive_per
from data_extraction._defensive_per_wnba import calculate_and_save_defensive_per
from data_extraction._nba_per import extract_and_calculate_per
from data_processing.clean_salaries import clean_and_extract_top_salaries, clean_and_extract_top_team_salaries
from data_processing.clean_per import clean_and_extract_top_per

# Ensure the 'cleaned_data' and 'extracted_data' directories exist
for dir_name in ['extracted_data', 'cleaned_data']:
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

# Paths for player salary files
nba_player_file = os.path.join('extracted_data', 'nba_player_salaries_2024.csv')
wnba_player_file = os.path.join('extracted_data', 'wnba_player_salaries_2024.csv')
output_nba_player_file = os.path.join('cleaned_data', 'top_150_nba_player_salaries_2024.csv')
output_wnba_player_file = os.path.join('cleaned_data', 'top_150_wnba_player_salaries_2024.csv')

# Paths for team salary files
nba_team_file = os.path.join('extracted_data', 'nba_team_salaries_2024.csv')
wnba_team_file = os.path.join('extracted_data', 'wnba_team_salaries_2024.csv')
output_nba_team_file = os.path.join('cleaned_data', 'top_12_nba_team_salaries_2024.csv')
output_wnba_team_file = os.path.join('cleaned_data', 'top_12_wnba_team_salaries_2024.csv')

# Extract NBA player salaries
extract_nba_player_salaries("https://hoopshype.com/salaries/players/2023-2024/", nba_player_file)

# Extract WNBA player salaries
extract_wnba_player_salaries("https://herhoopstats.com/salary-cap-sheet/wnba/players/salary_2024/stats_2024/", wnba_player_file)

# Clean and process player salaries
clean_and_extract_top_salaries(nba_player_file, wnba_player_file, output_nba_player_file, output_wnba_player_file)

# Extract NBA team salaries
extract_nba_team_salaries("https://hoopshype.com/salaries/2023-2024/", nba_team_file)

# Extract WNBA team salaries
extract_wnba_team_salaries("https://herhoopstats.com/salary-cap-sheet/wnba/summary/2024/", wnba_team_file)

# Clean and process team salaries
clean_and_extract_top_team_salaries(nba_team_file, wnba_team_file, output_nba_team_file, output_wnba_team_file)

# Extract and calculate NBA PER for both Offensive and Defensive
extract_and_calculate_per()

# Ensure the output NBA PER files are located correctly
# Move the files if they are not saved where expected
for file_name in ['nba_top_50_offensive_per.csv', 'nba_top_50_defensive_per.csv']:
    src = file_name
    dst = os.path.join('extracted_data', file_name)
    if os.path.exists(src):
        if os.path.exists(dst):
            os.remove(dst)  # Remove existing file if it exists to avoid FileExistsError
        os.rename(src, dst)

# Paths for PER files
# Paths for PER files
nba_offensive_per_file = os.path.join('extracted_data', 'nba_top_50_offensive_per.csv')
wnba_offensive_per_file = os.path.join('extracted_data', 'wnba_top_50_offensive_per.csv')
output_nba_offensive_per_file = os.path.join('cleaned_data', 'c_nba_top_50_offensive_per.csv')
output_wnba_offensive_per_file = os.path.join('cleaned_data', 'c_wnba_top_50_offensive_per.csv')

nba_defensive_per_file = os.path.join('extracted_data', 'nba_top_50_defensive_per.csv')
wnba_defensive_per_file = os.path.join('extracted_data', 'wnba_top_50_defensive_per.csv')
output_nba_defensive_per_file = os.path.join('cleaned_data', 'c_nba_top_50_defensive_per.csv')
output_wnba_defensive_per_file = os.path.join('cleaned_data', 'c_wnba_top_50_defensive_per.csv')

# Clean and process Offensive PER data
if os.path.exists(nba_offensive_per_file) and os.path.exists(wnba_offensive_per_file):
    clean_and_extract_top_per(nba_offensive_per_file, wnba_offensive_per_file, output_nba_offensive_per_file, output_wnba_offensive_per_file, 'O_PER')

# Clean and process Defensive PER data
if os.path.exists(nba_defensive_per_file) and os.path.exists(wnba_defensive_per_file):
    clean_and_extract_top_per(nba_defensive_per_file, wnba_defensive_per_file, output_nba_defensive_per_file, output_wnba_defensive_per_file, 'D_PER')

print("All data extraction, cleaning, and PER calculations completed.")