In [None]:
# Import libraries
import pandas as pd
import os

# Define file paths and load data
data_path = "../processed-data"
season_file = "combined_data.csv" # Combined dataset
file_path = os.path.join(data_path, season_file)
combined_data = pd.read_csv(file_path)
combined_data # Test successful load

In [None]:
# Add SMOY Target for TOP 3 SMOY candidates
combined_data['SMOY_Candidate'] = combined_data['Awards'].apply(lambda x: 1 if '6MOY-1' in str(x) or '6MOY-2' in str(x) or '6MOY-3' in str(x) else 0)

# Display 
combined_data[['Player', 'Season', 'Awards', 'SMOY_Candidate']].head()


In [None]:
# Filter players who have exactly '6MOY-1' in the Awards column
smoy_winners = combined_data[combined_data['Awards'] == '6MOY-1']

# Sort by 'MP' (minutes played) and select the lowest 5
lowest_mp_smoy_winners = smoy_winners.sort_values(by='MP')

# Display relevant columns for these players
lowest_mp_smoy_winners[['Player', 'MP', 'Awards', 'Season']]

# Lowest SMOY Winner had 19.3, My cutoff will be 20 'MP', Highest SMOY Winner had 33.7, ceiling will be 35 'MP'

In [None]:
# Filter players who have '6MOY-1' in their Awards column
smoy_players = combined_data[combined_data['Awards'].str.contains(r'6MOY-[1]\b', na=False)]

# Sort by 'G' (Games Played)
top_g_smoy_players = smoy_players[['Player', 'G', 'GS', 'MP', 'Awards', 'Season']].sort_values(by=['G'], ascending=False)

# Display the result
top_g_smoy_players

# Lowest SMOY Winner had 50, My cutoff will be 50 'G'

In [None]:
# Filter players who have '6MOY-1' in their Awards column
smoy_players = combined_data[combined_data['Awards'].str.contains(r'6MOY-[1]\b', na=False)]

# Sort by 'GS' (Games Played)
top_gs_smoy_players = smoy_players[['Player', 'G', 'GS', 'MP', 'Awards', 'Season']].sort_values(by=['GS'], ascending=False)

# Display the result
top_gs_smoy_players

# Highest SMOY Winner had 35, My cutoff will be 40 'GS'

In [None]:
# One-Hot Encode Player Positions
print(combined_data['Pos'].unique())
combined_data = pd.get_dummies(combined_data, columns=['Pos'])
# Convert True/False to 1/0
combined_data[['Pos_C', 'Pos_PF', 'Pos_PG', 'Pos_SF', 'Pos_SG']] = combined_data[['Pos_C', 'Pos_PF', 'Pos_PG', 'Pos_SF', 'Pos_SG']].astype(int)
print(combined_data.columns)
combined_data.head()

In [None]:
# Add Eligibility binary, only including players that fit preliminary SMOY reqs (i.e: 20 <= MP <= 35, GS <= 40, G >= 50 )
combined_data['SMOY_Eligible'] = combined_data.apply(
    lambda row: 1 if (row['GS'] <= 40) and (row['MP'] >= 20) and (row['MP'] <= 35) and (row['G'] >= 50) else 0, axis=1
)

# Display players who are SMOY eligible
eligible_players = combined_data[combined_data['SMOY_Candidate'] == 1]

# Show the first few rows of the eligible players
eligible_players[['Player', 'Season', 'GS', 'G', 'MP', 'SMOY_Eligible', 'SMOY_Candidate', 'Awards']]

In [None]:
# Code block for writing/saving processed data

# Save the eligible player data to a new CSV file
output_path = "../processed-data/smoy_eligible_players.csv"
eligible_players.to_csv(output_path, index=False)

# Save combined data set with new target columns
combined_data_target_output_path = "../processed-data/combined_data_w_target.csv"
combined_data.to_csv(combined_data_target_output_path, index=False)

# Confirm the data has been saved
print(f"SMOY Eligible Player data saved to {output_path}")
print(f"Combined Dataset with Target data saved to {combined_data_target_output_path}")