# Feature Engineering

## 1. Import Libraries and Load Data

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

awards_players_cleaned = pd.read_csv('../data/basketballPlayoffs_cleaned/awards_players_cleaned.csv')
coaches_cleaned = pd.read_csv('../data/basketballPlayoffs_cleaned/coaches_cleaned.csv')
players_cleaned = pd.read_csv('../data/basketballPlayoffs_cleaned/players_cleaned.csv')
players_teams_cleaned = pd.read_csv('../data/basketballPlayoffs_cleaned/players_teams_cleaned.csv')
series_post_cleaned = pd.read_csv('../data/basketballPlayoffs_cleaned/series_post_cleaned.csv')
teams_cleaned = pd.read_csv('../data/basketballPlayoffs_cleaned/teams_cleaned.csv')
teams_post_cleaned = pd.read_csv('../data/basketballPlayoffs_cleaned/teams_post_cleaned.csv')

## 2. Overall Calculation
### 2.1 Stamina Overall Calculation

The `calculate_stamina` function computes players overall stamina based on their game and minute statistics.

In [2]:
def calculate_stamina(df):
    total_games = df['GP'].sum() + df['PostGP'].sum()
    total_minutes = df['minutes'].sum() + df['PostMinutes'].sum()

    mean_games = total_games / len(df)
    mean_minutes = total_minutes / len(df)

    overall_stamina = (
        (0.3 * (df['GP'] + df['PostGP']) / mean_games) + 
        (0.7 * (df['minutes'] + df['PostMinutes']) / mean_minutes)
    )

    overall_stamina = np.clip(overall_stamina * 5, 1, 10)
    return overall_stamina.round(1)

players_teams_cleaned['overallSTAMINA'] = calculate_stamina(players_teams_cleaned)


### 2.2 Defense Overall Calculation

The `calculate_overall_defense` function computes players overall defensive metrics based on various statistics.

In [3]:
def calculate_overall_defense(df):
    total_games = df['GP'].sum() + df['PostGP'].sum()

    mean_drebounds = (df['dRebounds'].sum() + df['PostdRebounds'].sum()) / total_games if total_games > 0 else 1
    mean_steals = (df['steals'].sum() + df['PostSteals'].sum()) / total_games if total_games > 0 else 1
    mean_blocks = (df['blocks'].sum() + df['PostBlocks'].sum()) / total_games if total_games > 0 else 1
    mean_oRebounds = (df['oRebounds'].sum() + df['PostoRebounds'].sum()) / total_games if total_games > 0 else 1

    player_drebounds = (df['dRebounds'] + df['PostdRebounds']) / (df['GP'] + df['PostGP'])
    player_steals = (df['steals'] + df['PostSteals']) / (df['GP'] + df['PostGP'])
    player_blocks = (df['blocks'] + df['PostBlocks']) / (df['GP'] + df['PostGP'])
    player_oRebounds = (df['oRebounds'] + df['PostoRebounds']) / (df['GP'] + df['PostGP'])

    overall_defense = (
        (0.4 * (player_drebounds / mean_drebounds)) + 
        (0.2 * (player_steals / mean_steals)) + 
        (0.2 * (player_blocks / mean_blocks)) + 
        (0.2 * (player_oRebounds / mean_oRebounds))
    )
    
    overall_defense = np.clip(overall_defense * 10, 1, 10)
    return overall_defense.round(1)

players_teams_cleaned['overallDEFENSE'] = calculate_overall_defense(players_teams_cleaned)

### 2.3 Defense Overall Calculation

The `calculate_overall_offense` function computes players overall offense metrics based on various statistics.

In [4]:
def calculate_overall_offense(df):
    total_games = df['GP'].sum() + df['PostGP'].sum()

    mean_points = (df['points'].sum() + df['PostPoints'].sum()) / total_games if total_games > 0 else 1
    mean_assists = (df['assists'].sum() + df['PostAssists'].sum()) / total_games if total_games > 0 else 1
    mean_fgMade = (df['fgMade'].sum() + df['PostfgMade'].sum()) / total_games if total_games > 0 else 1
    mean_ftMade = (df['ftMade'].sum() + df['PostftMade'].sum()) / total_games if total_games > 0 else 1
    mean_threeMade = (df['threeMade'].sum() + df['PostthreeMade'].sum()) / total_games if total_games > 0 else 1

    player_points = (df['points'] + df['PostPoints']) / (df['GP'] + df['PostGP'])
    player_assists = (df['assists'] + df['PostAssists']) / (df['GP'] + df['PostGP'])
    player_fgMade = (df['fgMade'] + df['PostfgMade']) / (df['GP'] + df['PostGP'])
    player_ftMade = (df['ftMade'] + df['PostftMade']) / (df['GP'] + df['PostGP'])
    player_threeMade = (df['threeMade'] + df['PostthreeMade']) / (df['GP'] + df['PostGP'])

    overall_offense = (
        (0.4 * (player_points / mean_points)) +
        (0.3 * (player_assists / mean_assists)) +
        (0.1 * (player_fgMade / mean_fgMade)) +
        (0.1 * (player_ftMade / mean_ftMade)) +
        (0.1 * (player_threeMade / mean_threeMade))
    )

    overall_offense = np.clip(overall_offense * 10, 1, 10)
    return overall_offense.round(1)

players_teams_cleaned['overallOFFENSE'] = calculate_overall_offense(players_teams_cleaned)

### 2.3 Overall Combined Calculation

The `calculate_overall_combined` function computes a player overall rating by averaging their stamina, defense, and offense scores.

In [5]:
def calculate_overall_combined(df):
    combined_overall = (
        df['overallSTAMINA'] + 
        df['overallDEFENSE'] + 
        df['overallOFFENSE']
    ) / 3
    
    combined_overall = np.clip(combined_overall, 1, 10)
    return combined_overall.round(1)

players_teams_cleaned['OVERALL'] = calculate_overall_combined(players_teams_cleaned)

# 3. Export Updated DataFrame with overalls

In [6]:
players_teams_cleaned.to_csv('../data/basketballPlayoffs_cleaned/players_teams_cleaned.csv', index=False)