In [None]:
from BRScraper import nba
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Function to preprocess a single season for MIP prediction
def preprocess_mip_data(current_season_file, previous_season_file, mip_votings):
    # Read data for the current and previous seasons
    df_curr = pd.read_csv(current_season_file)
    df_prev = pd.read_csv(previous_season_file)
    
    # Select relevant columns
    columns = ['Player', 'PTS', 'MP', 'FG%', '3P%', 'AST', 'TRB', 'Team']
    df_curr = df_curr[columns]
    df_prev = df_prev[columns]

    # Merge data for current and previous seasons on Player
    df_combined = pd.merge(df_curr, df_prev, on='Player', suffixes=('_curr', '_prev'))
    
    # Compute year-over-year improvements
    for metric in ['PTS', 'MP', 'FG%', '3P%', 'AST', 'TRB']:
        df_combined[f'{metric}_improvement'] = df_combined[f'{metric}_curr'] - df_combined[f'{metric}_prev']
    
    # Add MIP target variable
    df_combined['MIP_Nominated'] = df_combined['Player'].apply(lambda player: 1 if player in mip_votings else 0)

    return df_combined


: 

In [None]:
# Define input folder paths
input_folder = "processed_data"
output_file = "mip_combined_data.csv"

# List to hold all season data
dataframes = []

# Loop through seasons
for year in range(2010, 2024):
    curr_season = f"{year}-{str(year+1)[-2:]}"
    prev_season = f"{year-1}-{str(year)[-2:]}"
    
    current_season_file = f"{input_folder}/nba_player_stats_{curr_season}_processed.csv"
    previous_season_file = f"{input_folder}/nba_player_stats_{prev_season}_processed.csv"
    
    if os.path.exists(current_season_file) and os.path.exists(previous_season_file):
        try:
            # Fetch MIP voting data
            mip_data = nba.get_award_votings('mip', year)
            mip_votings = set(mip_data['Player'])  # Extract players nominated for MIP
            
            # Preprocess season data
            df_season = preprocess_mip_data(current_season_file, previous_season_file, mip_votings)
            dataframes.append(df_season)
            
            print(f"Processed data for {curr_season}")
        except Exception as e:
            print(f"Error processing {curr_season}: {e}")

# Combine all seasons into a single dataframe
mip_combined_data = pd.concat(dataframes, ignore_index=True)
mip_combined_data.to_csv(output_file, index=False)

print(f"Combined data saved to {output_file}")


In [None]:


# Load combined data
data = pd.read_csv("mip_combined_data.csv")

# Features and target
feature_columns = [col for col in data.columns if 'improvement' in col]
X = data[feature_columns]
y = data['MIP_Nominated']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42, n_estimators=100)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
