In [None]:
!pip install -U kaleido

In [1]:
import sys
import os
import re 

# Add the project root directory to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), "../.."))  # Adjust to your project's structure
sys.path.append(project_root)

from src.analysis import *

In [2]:
sentiment_cols = [
    'sentiment_1', 'sentiment_2', 'sentiment_3', 'sentiment_4',
    'sentiment_5', 'sentiment_6', 'sentiment_7', 'sentiment_8',
    'sentiment_9', 'sentiment_10', 'sentiment_11', 'sentiment_12',
    'sentiment_13', 'sentiment_14', 'sentiment_15'
]


# Extract the numbers at the end of each string
numbers = [int(re.search(r'\d+$', col).group()) for col in sentiment_cols]

In [3]:
import pandas as pd

# Assume add_sentiment_avg() is already defined as:
def add_sentiment_avg(df):
    """
    Adds a column `sentiment_avg` to the DataFrame, which is the average
    of numeric data from the specified columns for each row.

    Parameters:
    df (pd.DataFrame): Input DataFrame

    Returns:
    pd.DataFrame: Updated DataFrame with the new `sentiment_avg` column
    """

    sentiment_cols = [
    'sentiment_1', 'sentiment_2', 'sentiment_3', 'sentiment_4',
    'sentiment_5', 'sentiment_6', 'sentiment_7', 'sentiment_8',
    'sentiment_9', 'sentiment_10', 'sentiment_11', 'sentiment_12',
    'sentiment_13', 'sentiment_14', 'sentiment_15'
]
    df["sentiment_avg"] = df[sentiment_cols].mean(axis=1)
    return df

# Function to load CSV, process it, and overwrite the original file
def process_csv_and_add_sentiment_avg(filepath, columns_to_average):
    """
    Loads a CSV file into a DataFrame, computes sentiment averages using
    add_sentiment_avg(), and overwrites the original file.

    Parameters:
    filepath (str): Path to the CSV file
    columns_to_average (list): List of column names to calculate the average from
    """
    # Step 1: Load the CSV into a DataFrame
    df = pd.read_csv(filepath)
    
    # Step 2: Apply the add_sentiment_avg function
    df = add_sentiment_avg(df, columns_to_average)
    
    # Step 3: Overwrite the original CSV file with the updated DataFrame
    df.to_csv(filepath, index=False)

    print(f"File '{filepath}' successfully updated with 'sentiment_avg' column.")

In [4]:
# for year in list(range(1980,2024)):
#     path = f'/Users/cb/src/nba_mvp_ml/data/processed/by_season/mvp/sentiment/mvp_{year}-{str(year+1)[2:]}.csv'
    
#     process_csv_and_add_sentiment_avg(filepath=path, columns_to_average=sentiment_cols)

In [5]:
import json

# Step 1: Load JSON file into a Python dictionary of dictionaries
with open("/Users/cb/src/nba_mvp_ml/json/mvp-qualitative.json", "r") as file:
    data = json.load(file)  # Assuming this loads a dict of dicts

# Step 2: Extract "title" fields from the nested dictionaries
titles = [item.get("title") for item in data.values()]

# Step 3: Define the list of 15 keys (to map the titles)
keys_list = sentiment_cols

# Step 4: Create the final dictionary
final_dict = {key: title for key, title in zip(keys_list, titles)}
final_dict = {key: title for key, title in zip(sentiment_cols, numbers)}
final_dict['sentiment_avg'] = 'Sentiment Avg'
final_dict['Pts_Won'] = 'MVP Points Won'
# Output the final dictionary
# print(final_dict)

In [7]:
dims = final_dict
player_df, team_df, mvp_df = load_year(2017)
for number, player in enumerate(list(mvp_df.head()['Player'])):
    print(number+1, player)
mvp_df = add_sentiment_avg(df=mvp_df)
advanced_metrics_player_contribution_filtered(player_df, team_df, mvp_df.head(), dims=dims, include_non_mvp=False)


1 JAMES HARDEN
2 LEBRON JAMES
3 ANTHONY DAVIS
4 DAMIAN LILLARD
5 RUSSELL WESTBROOK


In [None]:
dims = {
        'PLAYER_FULLNAME': 'Player',
        # 'eFG%_player': 'Eff. FG%',
        # 'TS%_player' : 'True Shooting%',
        'PER': 'PER',
        'WS': 'Win Shares',
        'BPM': '+/-',
        'SRS': 'SRS',
        # 'BLK': 'Blocks',
        # 'TRB': 'Rebounds',
        # 'WS/48': 'WS per 48 minutes (WS/48)',
        'PTS_opp_pg': 'Opp. PPG',
        # 'W/L%': 'Win-Loss %',
        'VORP': 'VORP',
        'Pace': 'Team Pace',
        'sentiment_8': 'Hist. Sig. (8)',
        'sentiment_2': 'Win Imp. (2)',
        'sentiment_14': "RP'ing (14)",
        'sentiment_avg': 'Sent. Avg.',
        'Pts_Won': 'MVP Pts Won'
}


for year in list(range(1980, 2024)):
    player_df, team_df, mvp_df = load_year(year)
    add_sentiment_avg(mvp_df)
    for number, player in enumerate(list(mvp_df.head()['Player'])):
        print(number+1, player)
    fig = advanced_metrics_player_contribution_filtered(player_df, team_df, mvp_df.head(), dims=dims, include_non_mvp=False)
    fig.write_image(f"/Users/cb/src/nba_mvp_ml/output/{year}.png", width=1400, height=1000, scale=2)
    # fig

1 JULIUS ERVING
2 LARRY BIRD
3 KAREEM ABDUL-JABBAR
4 MOSES MALONE
5 GEORGE GERVIN
1 MOSES MALONE
2 LARRY BIRD
3 JULIUS ERVING
4 ROBERT PARISH
5 GUS WILLIAMS
1 MOSES MALONE
2 LARRY BIRD
3 MAGIC JOHNSON
4 SIDNEY MONCRIEF
5 JULIUS ERVING
1 LARRY BIRD
2 BERNARD KING
3 MAGIC JOHNSON
4 KAREEM ABDUL-JABBAR
5 ISIAH THOMAS
1 LARRY BIRD
2 MAGIC JOHNSON
3 MOSES MALONE
4 KAREEM ABDUL-JABBAR
5 TERRY CUMMINGS
1 LARRY BIRD
2 DOMINIQUE WILKINS
3 MAGIC JOHNSON
4 HAKEEM OLAJUWON
5 KAREEM ABDUL-JABBAR
1 MAGIC JOHNSON
2 MICHAEL JORDAN
3 LARRY BIRD
4 KEVIN MCHALE
5 DOMINIQUE WILKINS
1 MICHAEL JORDAN
2 LARRY BIRD
3 MAGIC JOHNSON
4 CHARLES BARKLEY
5 CLYDE DREXLER
1 MAGIC JOHNSON
2 MICHAEL JORDAN
3 KARL MALONE
4 PATRICK EWING
5 HAKEEM OLAJUWON
1 MAGIC JOHNSON
2 CHARLES BARKLEY
3 MICHAEL JORDAN
4 KARL MALONE
5 PATRICK EWING
1 MICHAEL JORDAN
2 MAGIC JOHNSON
3 DAVID ROBINSON
4 CHARLES BARKLEY
5 KARL MALONE
1 MICHAEL JORDAN
2 CLYDE DREXLER
3 DAVID ROBINSON
4 KARL MALONE
5 PATRICK EWING
1 CHARLES BARKLEY
2 HAKEEM 

Index(['Rank', 'Player', 'Age', 'Tm', 'First', 'Pts Won', 'Pts Max', 'Share',
       'G', 'MP', 'PTS', 'TRB', 'AST', 'STL', 'BLK', 'FG%', '3P%', 'FT%', 'WS',
       'WS/48', 'sentiment_1', 'sentiment_2', 'sentiment_3', 'sentiment_4',
       'sentiment_5', 'sentiment_6', 'sentiment_7', 'sentiment_8',
       'sentiment_9', 'sentiment_10', 'sentiment_11', 'sentiment_12',
       'sentiment_13', 'sentiment_14', 'sentiment_15', 'sentiment_avg'],
      dtype='object')