In [1]:
import time
import json
import re
import os
import sys
# Add the project root directory to sys.path
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))  # Adjust to your project's structure
sys.path.append(project_root)

import pandas as pd
from openai import OpenAI

from src.analysis import *

In [2]:
def append_output_to_file(output):
    """
    Appends the given output to a file with a separator.

    Parameters:
    - output (str): The text to append to the file.
    - file_path (str): Path to the file where the output will be appended.

    Returns:
    - None
    """
    dump_file = '/Users/cb/src/nba_mvp_ml/data/gpt-4o-output-mvps-5-criteria.txt'
    
    separator = "\n" + "-" * 80 + "\n"  # Line of dashes for separation
    with open(dump_file, "a", encoding="utf-8") as file:
        file.write(output + separator)

In [3]:
def write_season(year):
    return f'{year}-{str(year+1)[2:]}'

def setup_openai_client(api_key_path, endpoint, model_name):
    """
    Sets up the OpenAI client using the provided API key and endpoint.

    Args:
    api_key_path (str): Path to the file containing the API key.
    endpoint (str): The base URL for the OpenAI API endpoint.
    model_name (str): The name of the model to use.

    Returns:
    tuple: A tuple containing the initialized OpenAI client and the model name.
    """
    try:
        # Read the API key from the file
        with open(api_key_path, "r") as file:
            api_key = file.read().strip()
        
        # Initialize the OpenAI client
        client = OpenAI(
            # base_url=endpoint,
            api_key=api_key,
        )
        
        return client, model_name

    except FileNotFoundError as e:
        raise FileNotFoundError(f"API key file not found: {e.filename}")
    except Exception as e:
        raise RuntimeError(f"Failed to set up OpenAI client: {str(e)}")

def load_json_files(prompts_path, role_path):
    """
    Reads two JSON files and returns their contents as dictionaries.
    
    Args:
    prompts_path (str): Path to the JSON file containing GPT prompts.
    role_path (str): Path to the JSON file containing GPT role configuration.
    
    Returns:
    tuple: A tuple containing two dictionaries, `gpt_prompts` and `gpt_role`.
    """
    try:
        # Read the prompts JSON file
        with open(prompts_path, 'r', encoding='utf-8') as file:
            gpt_prompts = json.load(file)
        
        # Read the role JSON file
        with open(role_path, 'r', encoding='utf-8') as file:
            gpt_role = json.load(file)
        
        return gpt_prompts, gpt_role
    
    except FileNotFoundError as e:
        raise FileNotFoundError(f"File not found: {e.filename}")
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON format in file: {e.msg}")

def extract_rating(input_string):
    """
    Extracts the first single digit in the format [[X / Y]] from the input string.
    
    Args:
    input_string (str): The input string containing the rating.
    
    Returns:
    int: The extracted single digit as an integer.
    """
    match = re.search(r'\[\s*\[\s*(\-*\d+\.*\d*)\s*/\s*\d+\s*\]\s*\]', input_string)
    if match:
        return float(match.group(1))
    else:
        print(input_string)
        print("Rating in the format [[X / Y]] not found in the input string.")
        return -1

def add_sentiment_avg(df):
    """
    Adds a column `sentiment_avg` to the DataFrame, which is the average
    of numeric data from the specified columns for each row.

    Parameters:
    df (pd.DataFrame): Input DataFrame

    Returns:
    pd.DataFrame: Updated DataFrame with the new `sentiment_avg` column
    """

    sentiment_cols = [
    'sentiment_1', 'sentiment_2', 'sentiment_3', 'sentiment_4',
    'sentiment_5'
]
    df["sentiment_avg"] = df[sentiment_cols].mean(axis=1)
    return df


def analyze_sentiment(client, model_name, role, prompt, year, player_name, temperature=1.0, top_p=1.0, max_tokens=1000):
    content = f'Consider the {year}-{str(year+1)[2:]} NBA season.  Give your response with respect to {player_name}. {prompt['prompt']}.'

    
    try:
        response = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": role,
                },
                {
                    "role": "user",
                    "content": content,
                }
            ],
            temperature=temperature,
            top_p=top_p,
            max_tokens=max_tokens,
            model=model_name
        )
    except Exception as e:
        print (f"Failed to receive response from OpenAI client with error: {str(e)}")    
        
    text = response.choices[0].message.content
    append_output_to_file(f'{write_season(year)} {player_name}\n{prompt['title']}: {text}')
    rating = extract_rating(text)

    print(f'{write_season(year)}: {player_name} - {prompt['title']}: {rating}')
        
    return rating
        
def tell_mvp_story(client, model_name, role, prompts, year, player_name, temperature=1.0, top_p=1.0, max_tokens=1000, sleep=7):
    ratings = {
        player_name:{}
    }
    
    for prompt in prompts.keys():
        rating = analyze_sentiment(
            client=client,                     
            model_name=model_name,                     
            role=role,                     
            prompt=prompts[prompt],   
            year=year,
            player_name=player_name,                     
            temperature=temperature,                     
            top_p=top_p,                     
            max_tokens=max_tokens
        )

        ratings[player_name][prompt] = rating

        time.sleep(sleep) # Accomodate request limit ~10/min

    return ratings

def process_mvp_stories_for_year(client, model_name, role, prompts, year, df, temperature=1.0, top_p=1.0, max_tokens=1000, sleep=7):
    """
    Processes the MVP story for the first five players in the DataFrame and
    writes the sentiment ratings into new columns for each prompt key.

    Parameters:
    - client (object): ChatGPT client instance.
    - model_name (str): The model name to use for predictions.
    - role (str): System role for ChatGPT.
    - prompts (dict): Dictionary of prompts.
    - year (int): NBA season year (e.g., 2023 for the 2023-24 season).
    - temperature (float): Temperature parameter for ChatGPT.
    - top_p (float): Top-p parameter for ChatGPT.
    - max_tokens (int): Maximum tokens for each ChatGPT response.
    - sleep (int): Sleep duration between requests to avoid rate limits.

    Saves:
    - Overwrites the CSV with new sentiment columns added.
    """

    # Initialize new columns for each prompt key
    for prompt_key in prompts.keys():
        df[f"sentiment_{prompt_key}"] = 0  # Default to 0 for all rows

    # Process only the top 5 players based on the 'Rank' column
    for index, row in df.iterrows():
        # if index < 7:  # Only process the first 7 rows
        if index < 10:  # Only process the first 10 rows
            player_name = row['Player']
            ratings = tell_mvp_story(
                client=client,
                model_name=model_name,
                role=role,
                prompts=prompts,
                year=year,
                player_name=player_name,
                temperature=temperature,
                top_p=top_p,
                max_tokens=max_tokens,
                sleep=sleep
            )

            # Add ratings for each prompt key to the corresponding column
            for prompt_key, rating in ratings[player_name].items():
                df.at[index, f"sentiment_{prompt_key}"] = rating

            df = add_sentiment_avg(df)
        else:
            # Skip processing for rows beyond the top 5
            break

    # Save the modified DataFrame back to the file
    # file_path = f'/Users/cb/src/nba_mvp_ml/data/processed/by_season/mvp/sentiment/mvp_{year}-{str(year+1)[2:]}.csv'
    file_path = f'/Users/cb/src/nba_mvp_ml/data/sentiment/{write_season(year)}.csv'

    df.to_csv(file_path, index=False)
    print(f"Updated DataFrame saved to {file_path}")

In [4]:
# JSON Prompts
prompts_path = '/Users/cb/src/nba_mvp_ml/json/mvp-qualitative_updated.json'
role_path = '/Users/cb/src/nba_mvp_ml/json/mvp-role.json'

gpt_prompts, gpt_role = load_json_files(prompts_path, role_path)

# OpenAI Client
# api_key_path = "/Users/cb/src/gpt-4o-api-key-expires-02-25.txt"
api_key_path = "/Users/cb/src/nba_mvp_ml-key-paid.txt"

endpoint = "https://models.inference.ai.azure.com"
model_name = "gpt-4o"

client, model_name = setup_openai_client(api_key_path, endpoint, model_name)    

In [7]:
for year in list(range(1986,2024)) :
    # player_df, team_df, mvp_df = load_year(year, mvp_path='/Users/cb/src/nba_mvp_ml/data/processed/by_season/mvp')
    mvp_df = pd.read_csv(f'/Users/cb/src/nba_mvp_ml/data/{write_season(year)}.csv')

    process_mvp_stories_for_year(
        client=client, 
        model_name=model_name, 
        role=gpt_role['role'], 
        prompts=gpt_prompts, 
        year=year, 
        df=mvp_df,
        temperature=1.0, 
        top_p=1.0, 
        max_tokens=1000,
        sleep=1
    )

1986-87: Magic Johnson - Impact on Winning and Team Dynamics: 9.0
1986-87: Magic Johnson - Defining Individual Season Performance: 9.0
1986-87: Magic Johnson - Narrative and Cultural Resonance: 8.0
1986-87: Magic Johnson - Two-Way Excellence: 8.0
1986-87: Magic Johnson - Clutch and High-Stakes Impact: 9.0
1986-87: Michael Jordan - Impact on Winning and Team Dynamics: 9.0
1986-87: Michael Jordan - Defining Individual Season Performance: 9.0
1986-87: Michael Jordan - Narrative and Cultural Resonance: 8.0
1986-87: Michael Jordan - Two-Way Excellence: 9.0
1986-87: Michael Jordan - Clutch and High-Stakes Impact: 9.0
1986-87: Larry Bird - Impact on Winning and Team Dynamics: 9.0
1986-87: Larry Bird - Defining Individual Season Performance: 8.0
1986-87: Larry Bird - Narrative and Cultural Resonance: 7.0
1986-87: Larry Bird - Two-Way Excellence: 8.0
1986-87: Larry Bird - Clutch and High-Stakes Impact: 9.0
1986-87: Kevin McHale - Impact on Winning and Team Dynamics: 7.0
1986-87: Kevin McHale - 


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '9.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



1987-88: Larry Bird - Impact on Winning and Team Dynamics: 9.0
1987-88: Larry Bird - Defining Individual Season Performance: 8.0
1987-88: Larry Bird - Narrative and Cultural Resonance: 7.0
1987-88: Larry Bird - Two-Way Excellence: 8.0
1987-88: Larry Bird - Clutch and High-Stakes Impact: 8.0
1987-88: Magic Johnson - Impact on Winning and Team Dynamics: 9.0
1987-88: Magic Johnson - Defining Individual Season Performance: 9.0
1987-88: Magic Johnson - Narrative and Cultural Resonance: 7.0
1987-88: Magic Johnson - Two-Way Excellence: 8.0
1987-88: Magic Johnson - Clutch and High-Stakes Impact: 9.0
1987-88: Charles Barkley - Impact on Winning and Team Dynamics: 7.0
1987-88: Charles Barkley - Defining Individual Season Performance: 8.0
1987-88: Charles Barkley - Narrative and Cultural Resonance: 5.0
1987-88: Charles Barkley - Two-Way Excellence: 7.0
1987-88: Charles Barkley - Clutch and High-Stakes Impact: 7.0
1987-88: Clyde Drexler - Impact on Winning and Team Dynamics: 7.0
1987-88: Clyde Dre


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '9.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



1990-91: Magic Johnson - Impact on Winning and Team Dynamics: 9.0
1990-91: Magic Johnson - Defining Individual Season Performance: 8.0
1990-91: Magic Johnson - Narrative and Cultural Resonance: 7.0
1990-91: Magic Johnson - Two-Way Excellence: 8.0
1990-91: Magic Johnson - Clutch and High-Stakes Impact: 9.0
1990-91: David Robinson - Impact on Winning and Team Dynamics: 8.0
1990-91: David Robinson - Defining Individual Season Performance: 7.0
1990-91: David Robinson - Narrative and Cultural Resonance: 3.0
1990-91: David Robinson - Two-Way Excellence: 9.0
1990-91: David Robinson - Clutch and High-Stakes Impact: 7.0
1990-91: Charles Barkley - Impact on Winning and Team Dynamics: 8.0
1990-91: Charles Barkley - Defining Individual Season Performance: 7.5
1990-91: Charles Barkley - Narrative and Cultural Resonance: 5.0
1990-91: Charles Barkley - Two-Way Excellence: 8.0
1990-91: Charles Barkley - Clutch and High-Stakes Impact: 8.0



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '7.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



1990-91: Karl Malone - Impact on Winning and Team Dynamics: 8.0
1990-91: Karl Malone - Defining Individual Season Performance: 8.0
1990-91: Karl Malone - Narrative and Cultural Resonance: 4.0
1990-91: Karl Malone - Two-Way Excellence: 8.0
1990-91: Karl Malone - Clutch and High-Stakes Impact: 7.0
1990-91: Clyde Drexler - Impact on Winning and Team Dynamics: 7.0
1990-91: Clyde Drexler - Defining Individual Season Performance: 7.0
1990-91: Clyde Drexler - Narrative and Cultural Resonance: 5.0
1990-91: Clyde Drexler - Two-Way Excellence: 7.0
1990-91: Clyde Drexler - Clutch and High-Stakes Impact: 6.0
1990-91: Kevin Johnson - Impact on Winning and Team Dynamics: 7.0
1990-91: Kevin Johnson - Defining Individual Season Performance: 6.0
1990-91: Kevin Johnson - Narrative and Cultural Resonance: 4.0
1990-91: Kevin Johnson - Two-Way Excellence: 7.0
1990-91: Kevin Johnson - Clutch and High-Stakes Impact: 7.0
Updated DataFrame saved to /Users/cb/src/nba_mvp_ml/data/sentiment/1990-91.csv
1991-92: M


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Updated DataFrame saved to /Users/cb/src/nba_mvp_ml/data/sentiment/1994-95.csv
1995-96: Michael Jordan - Impact on Winning and Team Dynamics: 10.0
1995-96: Michael Jordan - Defining Individual Season Performance: 10.0
1995-96: Michael Jordan - Narrative and Cultural Resonance: 9.0
1995-96: Michael Jordan - Two-Way Excellence: 10.0
1995-96: Michael Jordan - Clutch and High-Stakes Impact: 10.0
1995-96: David Robinson - Impact on Winning and Team Dynamics: 8.0
1995-96: David Robinson - Defining Individual Season Performance: 8.0
1995-96: David Robinson - Narrative and Cultural Resonance: 6.0
1995-96: David Robinson - Two-Way Excellence: 9.0
1995-96: David Robinson - Clutch and High-Stakes Impact: 7.0
1995-96: Anfernee Hardaway - Impact on Winning and Team Dynamics: 7.0
1995-96: Anfernee Hardaway - Defining Individual Season Performance: 7.0
1995-96: Anfernee Hardaway - Narrative and Cultural Resonance: 6.0
1995-96: Anfernee Hardaway - Two-Way Excellence: 7.0
1995-96: Anfernee Hardaway - C


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



1996-97: Grant Hill - Impact on Winning and Team Dynamics: 8.0
1996-97: Grant Hill - Defining Individual Season Performance: 7.0
1996-97: Grant Hill - Narrative and Cultural Resonance: 6.0
1996-97: Grant Hill - Two-Way Excellence: 8.0
1996-97: Grant Hill - Clutch and High-Stakes Impact: 7.0
1996-97: Tim Hardaway - Impact on Winning and Team Dynamics: 7.0
1996-97: Tim Hardaway - Defining Individual Season Performance: 6.0
1996-97: Tim Hardaway - Narrative and Cultural Resonance: 3.0
1996-97: Tim Hardaway - Two-Way Excellence: 6.0
1996-97: Tim Hardaway - Clutch and High-Stakes Impact: 7.0
1996-97: Glen Rice - Impact on Winning and Team Dynamics: 6.0
1996-97: Glen Rice - Defining Individual Season Performance: 7.0
1996-97: Glen Rice - Narrative and Cultural Resonance: 4.0
1996-97: Glen Rice - Two-Way Excellence: 6.0
1996-97: Glen Rice - Clutch and High-Stakes Impact: 7.0
1996-97: Gary Payton - Impact on Winning and Team Dynamics: 8.0
1996-97: Gary Payton - Defining Individual Season Perfo


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



1996-97: Hakeem Olajuwon - Impact on Winning and Team Dynamics: 7.0
1996-97: Hakeem Olajuwon - Defining Individual Season Performance: 6.0
1996-97: Hakeem Olajuwon - Narrative and Cultural Resonance: 3.0
1996-97: Hakeem Olajuwon - Two-Way Excellence: 9.0
1996-97: Hakeem Olajuwon - Clutch and High-Stakes Impact: 7.0
Updated DataFrame saved to /Users/cb/src/nba_mvp_ml/data/sentiment/1996-97.csv
1997-98: Michael Jordan - Impact on Winning and Team Dynamics: 9.0
1997-98: Michael Jordan - Defining Individual Season Performance: 8.5
1997-98: Michael Jordan - Narrative and Cultural Resonance: 9.0
1997-98: Michael Jordan - Two-Way Excellence: 9.0
1997-98: Michael Jordan - Clutch and High-Stakes Impact: 10.0



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



1997-98: Karl Malone - Impact on Winning and Team Dynamics: 8.0
1997-98: Karl Malone - Defining Individual Season Performance: 7.0
1997-98: Karl Malone - Narrative and Cultural Resonance: 5.0
1997-98: Karl Malone - Two-Way Excellence: 8.0
1997-98: Karl Malone - Clutch and High-Stakes Impact: 7.0
1997-98: Gary Payton - Impact on Winning and Team Dynamics: 8.0
1997-98: Gary Payton - Defining Individual Season Performance: 6.0
1997-98: Gary Payton - Narrative and Cultural Resonance: 4.0
1997-98: Gary Payton - Two-Way Excellence: 9.0
1997-98: Gary Payton - Clutch and High-Stakes Impact: 7.0
1997-98: Shaquille O'Neal - Impact on Winning and Team Dynamics: 8.0
1997-98: Shaquille O'Neal - Defining Individual Season Performance: 7.0
1997-98: Shaquille O'Neal - Narrative and Cultural Resonance: 5.0
1997-98: Shaquille O'Neal - Two-Way Excellence: 8.0
1997-98: Shaquille O'Neal - Clutch and High-Stakes Impact: 7.0
1997-98: Tim Duncan - Impact on Winning and Team Dynamics: 7.0
1997-98: Tim Duncan -


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



1999-00: Gary Payton - Impact on Winning and Team Dynamics: 8.0
1999-00: Gary Payton - Defining Individual Season Performance: 7.0
1999-00: Gary Payton - Narrative and Cultural Resonance: 4.0
1999-00: Gary Payton - Two-Way Excellence: 8.0
1999-00: Gary Payton - Clutch and High-Stakes Impact: 7.0
1999-00: Allen Iverson - Impact on Winning and Team Dynamics: 7.0
1999-00: Allen Iverson - Defining Individual Season Performance: 8.0
1999-00: Allen Iverson - Narrative and Cultural Resonance: 7.0
1999-00: Allen Iverson - Two-Way Excellence: 7.0
1999-00: Allen Iverson - Clutch and High-Stakes Impact: 6.0
Updated DataFrame saved to /Users/cb/src/nba_mvp_ml/data/sentiment/1999-00.csv
2000-01: Allen Iverson - Impact on Winning and Team Dynamics: 9.0
2000-01: Allen Iverson - Defining Individual Season Performance: 9.0
2000-01: Allen Iverson - Narrative and Cultural Resonance: 9.0
2000-01: Allen Iverson - Two-Way Excellence: 6.0
2000-01: Allen Iverson - Clutch and High-Stakes Impact: 9.0
2000-01: T


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '7.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2001-02: Tracy McGrady - Impact on Winning and Team Dynamics: 8.0
2001-02: Tracy McGrady - Defining Individual Season Performance: 8.0
2001-02: Tracy McGrady - Narrative and Cultural Resonance: 6.0
2001-02: Tracy McGrady - Two-Way Excellence: 8.0
2001-02: Tracy McGrady - Clutch and High-Stakes Impact: 8.0
2001-02: Kobe Bryant - Impact on Winning and Team Dynamics: 7.0
2001-02: Kobe Bryant - Defining Individual Season Performance: 7.0
2001-02: Kobe Bryant - Narrative and Cultural Resonance: 6.0
2001-02: Kobe Bryant - Two-Way Excellence: 8.0
2001-02: Kobe Bryant - Clutch and High-Stakes Impact: 8.0
2001-02: Gary Payton - Impact on Winning and Team Dynamics: 7.0
2001-02: Gary Payton - Defining Individual Season Performance: 6.0
2001-02: Gary Payton - Narrative and Cultural Resonance: 4.0
2001-02: Gary Payton - Two-Way Excellence: 8.0
2001-02: Gary Payton - Clutch and High-Stakes Impact: 7.0
2001-02: Chris Webber - Impact on Winning and Team Dynamics: 7.0
2001-02: Chris Webber - Defining I


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '7.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



Updated DataFrame saved to /Users/cb/src/nba_mvp_ml/data/sentiment/2001-02.csv
2002-03: Tim Duncan - Impact on Winning and Team Dynamics: 9.0
2002-03: Tim Duncan - Defining Individual Season Performance: 9.0
2002-03: Tim Duncan - Narrative and Cultural Resonance: 5.0
2002-03: Tim Duncan - Two-Way Excellence: 9.0
2002-03: Tim Duncan - Clutch and High-Stakes Impact: 9.0
2002-03: Kevin Garnett - Impact on Winning and Team Dynamics: 8.0
2002-03: Kevin Garnett - Defining Individual Season Performance: 8.0
2002-03: Kevin Garnett - Narrative and Cultural Resonance: 6.0
2002-03: Kevin Garnett - Two-Way Excellence: 9.0
2002-03: Kevin Garnett - Clutch and High-Stakes Impact: 7.0
2002-03: Kobe Bryant - Impact on Winning and Team Dynamics: 8.0
2002-03: Kobe Bryant - Defining Individual Season Performance: 8.0
2002-03: Kobe Bryant - Narrative and Cultural Resonance: 6.0
2002-03: Kobe Bryant - Two-Way Excellence: 8.0
2002-03: Kobe Bryant - Clutch and High-Stakes Impact: 8.0
2002-03: Tracy McGrady - 


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '9.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2003-04: Tim Duncan - Impact on Winning and Team Dynamics: 8.5
2003-04: Tim Duncan - Defining Individual Season Performance: 7.0
2003-04: Tim Duncan - Narrative and Cultural Resonance: 5.0
2003-04: Tim Duncan - Two-Way Excellence: 9.0
2003-04: Tim Duncan - Clutch and High-Stakes Impact: 8.0



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2003-04: Jermaine O'Neal - Impact on Winning and Team Dynamics: 7.0
2003-04: Jermaine O'Neal - Defining Individual Season Performance: 6.0
2003-04: Jermaine O'Neal - Narrative and Cultural Resonance: 5.0
2003-04: Jermaine O'Neal - Two-Way Excellence: 7.0
2003-04: Jermaine O'Neal - Clutch and High-Stakes Impact: 6.0
2003-04: Peja Stojaković - Impact on Winning and Team Dynamics: 7.0
2003-04: Peja Stojaković - Defining Individual Season Performance: 7.0
2003-04: Peja Stojaković - Narrative and Cultural Resonance: 3.0
2003-04: Peja Stojaković - Two-Way Excellence: 6.0
2003-04: Peja Stojaković - Clutch and High-Stakes Impact: 7.0
2003-04: Kobe Bryant - Impact on Winning and Team Dynamics: 7.0
2003-04: Kobe Bryant - Defining Individual Season Performance: 6.0
2003-04: Kobe Bryant - Narrative and Cultural Resonance: 5.0
2003-04: Kobe Bryant - Two-Way Excellence: 7.0
2003-04: Kobe Bryant - Clutch and High-Stakes Impact: 7.0
2003-04: Shaquille O'Neal - Impact on Winning and Team Dynamics: 7.0



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '7.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2005-06: LeBron James - Impact on Winning and Team Dynamics: 8.0
2005-06: LeBron James - Defining Individual Season Performance: 7.0
2005-06: LeBron James - Narrative and Cultural Resonance: 7.0
2005-06: LeBron James - Two-Way Excellence: 8.0
2005-06: LeBron James - Clutch and High-Stakes Impact: 8.0
2005-06: Dirk Nowitzki - Impact on Winning and Team Dynamics: 8.0
2005-06: Dirk Nowitzki - Defining Individual Season Performance: 8.0
2005-06: Dirk Nowitzki - Narrative and Cultural Resonance: 5.0
2005-06: Dirk Nowitzki - Two-Way Excellence: 7.0
2005-06: Dirk Nowitzki - Clutch and High-Stakes Impact: 8.0
2005-06: Kobe Bryant - Impact on Winning and Team Dynamics: 9.0
2005-06: Kobe Bryant - Defining Individual Season Performance: 9.0
2005-06: Kobe Bryant - Narrative and Cultural Resonance: 8.0
2005-06: Kobe Bryant - Two-Way Excellence: 8.0
2005-06: Kobe Bryant - Clutch and High-Stakes Impact: 10.0
2005-06: Chauncey Billups - Impact on Winning and Team Dynamics: 7.0
2005-06: Chauncey Billup


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2007-08: Dwight Howard - Impact on Winning and Team Dynamics: 7.0
2007-08: Dwight Howard - Defining Individual Season Performance: 7.0
2007-08: Dwight Howard - Narrative and Cultural Resonance: 4.0
2007-08: Dwight Howard - Two-Way Excellence: 8.0
2007-08: Dwight Howard - Clutch and High-Stakes Impact: 6.0
2007-08: Amar'e Stoudemire - Impact on Winning and Team Dynamics: 7.0
2007-08: Amar'e Stoudemire - Defining Individual Season Performance: 7.0
2007-08: Amar'e Stoudemire - Narrative and Cultural Resonance: 3.0
2007-08: Amar'e Stoudemire - Two-Way Excellence: 6.0
2007-08: Amar'e Stoudemire - Clutch and High-Stakes Impact: 6.0
2007-08: Tim Duncan - Impact on Winning and Team Dynamics: 8.0
2007-08: Tim Duncan - Defining Individual Season Performance: 7.0
2007-08: Tim Duncan - Narrative and Cultural Resonance: 4.0
2007-08: Tim Duncan - Two-Way Excellence: 9.0
2007-08: Tim Duncan - Clutch and High-Stakes Impact: 8.0
Updated DataFrame saved to /Users/cb/src/nba_mvp_ml/data/sentiment/2007-08


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2010-11: Dwyane Wade - Impact on Winning and Team Dynamics: 7.0
2010-11: Dwyane Wade - Defining Individual Season Performance: 7.0
2010-11: Dwyane Wade - Narrative and Cultural Resonance: 3.0
2010-11: Dwyane Wade - Two-Way Excellence: 8.0
2010-11: Dwyane Wade - Clutch and High-Stakes Impact: 7.0
Updated DataFrame saved to /Users/cb/src/nba_mvp_ml/data/sentiment/2010-11.csv
2011-12: LeBron James - Impact on Winning and Team Dynamics: 9.0
2011-12: LeBron James - Defining Individual Season Performance: 9.0
2011-12: LeBron James - Narrative and Cultural Resonance: 8.0
2011-12: LeBron James - Two-Way Excellence: 9.5
2011-12: LeBron James - Clutch and High-Stakes Impact: 9.0



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '9.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2011-12: Kevin Durant - Impact on Winning and Team Dynamics: 8.0
2011-12: Kevin Durant - Defining Individual Season Performance: 8.0
2011-12: Kevin Durant - Narrative and Cultural Resonance: 6.0
2011-12: Kevin Durant - Two-Way Excellence: 7.0
2011-12: Kevin Durant - Clutch and High-Stakes Impact: 8.0
2011-12: Chris Paul - Impact on Winning and Team Dynamics: 8.0
2011-12: Chris Paul - Defining Individual Season Performance: 7.0
2011-12: Chris Paul - Narrative and Cultural Resonance: 6.0
2011-12: Chris Paul - Two-Way Excellence: 8.0
2011-12: Chris Paul - Clutch and High-Stakes Impact: 8.0
2011-12: Kobe Bryant - Impact on Winning and Team Dynamics: 8.0
2011-12: Kobe Bryant - Defining Individual Season Performance: 7.0
2011-12: Kobe Bryant - Narrative and Cultural Resonance: 7.0
2011-12: Kobe Bryant - Two-Way Excellence: 7.0
2011-12: Kobe Bryant - Clutch and High-Stakes Impact: 8.0
2011-12: Tony Parker - Impact on Winning and Team Dynamics: 7.0
2011-12: Tony Parker - Defining Individual Se


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '9.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2012-13: Kevin Durant - Impact on Winning and Team Dynamics: 8.5
2012-13: Kevin Durant - Defining Individual Season Performance: 8.0
2012-13: Kevin Durant - Narrative and Cultural Resonance: 6.0
2012-13: Kevin Durant - Two-Way Excellence: 7.0
2012-13: Kevin Durant - Clutch and High-Stakes Impact: 8.0



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2012-13: Carmelo Anthony - Impact on Winning and Team Dynamics: 7.0
2012-13: Carmelo Anthony - Defining Individual Season Performance: 7.0
2012-13: Carmelo Anthony - Narrative and Cultural Resonance: 6.0
2012-13: Carmelo Anthony - Two-Way Excellence: 6.0
2012-13: Carmelo Anthony - Clutch and High-Stakes Impact: 8.0
2012-13: Chris Paul - Impact on Winning and Team Dynamics: 8.0
2012-13: Chris Paul - Defining Individual Season Performance: 7.0
2012-13: Chris Paul - Narrative and Cultural Resonance: 4.0
2012-13: Chris Paul - Two-Way Excellence: 8.0
2012-13: Chris Paul - Clutch and High-Stakes Impact: 7.0
2012-13: Kobe Bryant - Impact on Winning and Team Dynamics: 8.0
2012-13: Kobe Bryant - Defining Individual Season Performance: 8.0
2012-13: Kobe Bryant - Narrative and Cultural Resonance: 7.0
2012-13: Kobe Bryant - Two-Way Excellence: 7.0
2012-13: Kobe Bryant - Clutch and High-Stakes Impact: 9.0
2012-13: Tony Parker - Impact on Winning and Team Dynamics: 8.0
2012-13: Tony Parker - Definin


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2019-20: LeBron James - Impact on Winning and Team Dynamics: 9.0
2019-20: LeBron James - Defining Individual Season Performance: 8.0
2019-20: LeBron James - Narrative and Cultural Resonance: 8.0
2019-20: LeBron James - Two-Way Excellence: 9.0
2019-20: LeBron James - Clutch and High-Stakes Impact: 9.0
2019-20: James Harden - Impact on Winning and Team Dynamics: 8.0
2019-20: James Harden - Defining Individual Season Performance: 8.0
2019-20: James Harden - Narrative and Cultural Resonance: 5.0
2019-20: James Harden - Two-Way Excellence: 6.0
2019-20: James Harden - Clutch and High-Stakes Impact: 7.0
2019-20: Luka Dončić - Impact on Winning and Team Dynamics: 8.0
2019-20: Luka Dončić - Defining Individual Season Performance: 7.5
2019-20: Luka Dončić - Narrative and Cultural Resonance: 6.0
2019-20: Luka Dončić - Two-Way Excellence: 7.0
2019-20: Luka Dončić - Clutch and High-Stakes Impact: 8.0



Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '7.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2019-20: Kawhi Leonard - Impact on Winning and Team Dynamics: 7.0
2019-20: Kawhi Leonard - Defining Individual Season Performance: 7.0
2019-20: Kawhi Leonard - Narrative and Cultural Resonance: 4.0
2019-20: Kawhi Leonard - Two-Way Excellence: 8.0
2019-20: Kawhi Leonard - Clutch and High-Stakes Impact: 7.0
2019-20: Anthony Davis - Impact on Winning and Team Dynamics: 8.0
2019-20: Anthony Davis - Defining Individual Season Performance: 7.0
2019-20: Anthony Davis - Narrative and Cultural Resonance: 6.0
2019-20: Anthony Davis - Two-Way Excellence: 9.0
2019-20: Anthony Davis - Clutch and High-Stakes Impact: 8.0
2019-20: Chris Paul - Impact on Winning and Team Dynamics: 8.0
2019-20: Chris Paul - Defining Individual Season Performance: 7.0
2019-20: Chris Paul - Narrative and Cultural Resonance: 7.0
2019-20: Chris Paul - Two-Way Excellence: 8.0
2019-20: Chris Paul - Clutch and High-Stakes Impact: 8.0
Updated DataFrame saved to /Users/cb/src/nba_mvp_ml/data/sentiment/2019-20.csv
2020-21: Nikola


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2020-21: Stephen Curry - Impact on Winning and Team Dynamics: 9.0
2020-21: Stephen Curry - Defining Individual Season Performance: 9.0
2020-21: Stephen Curry - Narrative and Cultural Resonance: 7.0
2020-21: Stephen Curry - Two-Way Excellence: 7.0
2020-21: Stephen Curry - Clutch and High-Stakes Impact: 9.0
2020-21: Giannis Antetokounmpo - Impact on Winning and Team Dynamics: 8.0
2020-21: Giannis Antetokounmpo - Defining Individual Season Performance: 7.0
2020-21: Giannis Antetokounmpo - Narrative and Cultural Resonance: 6.0
2020-21: Giannis Antetokounmpo - Two-Way Excellence: 9.0
2020-21: Giannis Antetokounmpo - Clutch and High-Stakes Impact: 9.0
2020-21: Chris Paul - Impact on Winning and Team Dynamics: 8.0
2020-21: Chris Paul - Defining Individual Season Performance: 7.0
2020-21: Chris Paul - Narrative and Cultural Resonance: 6.0
2020-21: Chris Paul - Two-Way Excellence: 8.0
2020-21: Chris Paul - Clutch and High-Stakes Impact: 8.0
2020-21: Luka Dončić - Impact on Winning and Team Dyna


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '8.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2022-23: Jayson Tatum - Impact on Winning and Team Dynamics: 7.0
2022-23: Jayson Tatum - Defining Individual Season Performance: 7.0
2022-23: Jayson Tatum - Narrative and Cultural Resonance: 5.0
2022-23: Jayson Tatum - Two-Way Excellence: 8.0
2022-23: Jayson Tatum - Clutch and High-Stakes Impact: 7.0
2022-23: Shai Gilgeous-Alexander - Impact on Winning and Team Dynamics: 8.0
2022-23: Shai Gilgeous-Alexander - Defining Individual Season Performance: 7.0
2022-23: Shai Gilgeous-Alexander - Narrative and Cultural Resonance: 5.0
2022-23: Shai Gilgeous-Alexander - Two-Way Excellence: 8.0
2022-23: Shai Gilgeous-Alexander - Clutch and High-Stakes Impact: 7.0
2022-23: Donovan Mitchell - Impact on Winning and Team Dynamics: 7.0
2022-23: Donovan Mitchell - Defining Individual Season Performance: 6.0
2022-23: Donovan Mitchell - Narrative and Cultural Resonance: 5.0
2022-23: Donovan Mitchell - Two-Way Excellence: 7.0
2022-23: Donovan Mitchell - Clutch and High-Stakes Impact: 7.0
2022-23: Domantas S


Setting an item of incompatible dtype is deprecated and will raise an error in a future version of pandas. Value '7.5' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.



2023-24: Shai Gilgeous-Alexander - Impact on Winning and Team Dynamics: 7.0
2023-24: Shai Gilgeous-Alexander - Defining Individual Season Performance: 8.0
2023-24: Shai Gilgeous-Alexander - Narrative and Cultural Resonance: 6.0
2023-24: Shai Gilgeous-Alexander - Two-Way Excellence: 8.0
2023-24: Shai Gilgeous-Alexander - Clutch and High-Stakes Impact: 7.0
2023-24: Luka Dončić - Impact on Winning and Team Dynamics: 8.0
2023-24: Luka Dončić - Defining Individual Season Performance: 8.0
2023-24: Luka Dončić - Narrative and Cultural Resonance: 7.0
2023-24: Luka Dončić - Two-Way Excellence: 7.0
2023-24: Luka Dončić - Clutch and High-Stakes Impact: 8.0
2023-24: Giannis Antetokounmpo - Impact on Winning and Team Dynamics: 9.0
2023-24: Giannis Antetokounmpo - Defining Individual Season Performance: 8.0
2023-24: Giannis Antetokounmpo - Narrative and Cultural Resonance: 6.0
2023-24: Giannis Antetokounmpo - Two-Way Excellence: 9.0
2023-24: Giannis Antetokounmpo - Clutch and High-Stakes Impact: 8.0

In [None]:
mvp_df.columns

In [None]:
player_df.columns

In [14]:
# def merge_dfs(player_df, team_df, mvp_df, include_non_mvp=False, debug=False):
#     # Merge player and team data
#     merged_df = pd.merge(player_df, team_df, on=['TEAM_ID', 'SEASON_ID'], suffixes=('_player', '_team'))

#     # Add a column to differentiate MVP candidates
#     merged_df['MVP_Candidate'] = merged_df['PLAYER_FULLNAME'].apply(
#         lambda x: 'MVP Candidate' if x in mvp_df['Player'].values else 'Other'
#     )

#     # Merge MVP voting data into the player/team dataset
#     merged_with_mvp = pd.merge(
#         merged_df,
#         mvp_df.drop(columns=['Age','Tm']),
#         # mvp_df[['Player', 'Pts Won', 'Pts Max', 'Share', 
#         #         'G', 'MP', 'PTS', 'TRB', 'AST', 'STL', 'BLK', 
#         #         'FG%', '3P%', 'FT%', 'WS', 'WS/48',
#         #         'sentiment_1', 'sentiment_2', 'sentiment_3', 'sentiment_4',
#         #         'sentiment_5', 'sentiment_6', 'sentiment_7', 'sentiment_8',
#         #         'sentiment_9', 'sentiment_10', 'sentiment_11', 'sentiment_12',
#         #         'sentiment_13', 'sentiment_14', 'sentiment_15']],  # Select key MVP metrics
#         how='left',
#         left_on='PLAYER_FULLNAME',
#         right_on='Player'
#     )

#     # Replace metrics with MVP data where available, with fallback logic
#     merged_with_mvp['WS'] = merged_with_mvp['WS_y'].fillna(merged_with_mvp['WS_x'])
#     merged_with_mvp['PTS'] = merged_with_mvp['PTS'].fillna(merged_with_mvp['PTS_player'])
#     merged_with_mvp['TRB'] = merged_with_mvp['TRB_y'].fillna(merged_with_mvp['REB'])
#     merged_with_mvp['AST'] = merged_with_mvp.get('AST_y', merged_with_mvp['AST_player'])  # Fallback if 'AST_y' is missing

#     # Optionally filter out non-MVP candidates
#     if not include_non_mvp:
#         merged_with_mvp = merged_with_mvp[merged_with_mvp['MVP_Candidate'] != 'Other']
    
#     if debug:
#         display(sorted(list(merged_with_mvp.columns)))
    
#     # Drop unnecessary columns and avoid confusion between suffixes
#     merged_with_mvp = merged_with_mvp.rename(columns={'Pts Won': 'Pts_Won'})

#     return merged_with_mvp

# list(merge_dfs(*load_year(2021)).columns)




['Unnamed: 0',
 'PLAYER_ID',
 'PLAYER_FULLNAME',
 'SEASON_ID',
 'LEAGUE_ID',
 'TEAM_ID',
 'FGM',
 'FGA_player',
 'FG_PCT',
 'FG3M',
 'FG3A',
 'FG3_PCT',
 'FTM',
 'FTA_player',
 'FT_PCT',
 'OREB',
 'DREB',
 'REB',
 'AST_player',
 'STL_player',
 'BLK_player',
 'TOV_player',
 'PF_player',
 'PTS_player',
 'MIN_PG',
 'FGM_PG',
 'FGA_PG',
 'FG_PCT_PG',
 'FG3M_PG',
 'FG3A_PG',
 'FG3_PCT_PG',
 'FTM_PG',
 'FTA_PG',
 'FT_PCT_PG',
 'OREB_PG',
 'DREB_PG',
 'REB_PG',
 'AST_PG',
 'STL_PG',
 'BLK_PG',
 'TOV_PG',
 'PF_PG',
 'PTS_PG',
 'TS%_player',
 'eFG%_player',
 'Rk',
 'GP',
 'MIN',
 '3PAr_player',
 'ASTPct',
 'BLKPct',
 'BPM',
 'DBPM',
 'DRBPct',
 'DWS',
 'FTr_player',
 'GS',
 'OBPM',
 'ORBPct',
 'OWS',
 'PER',
 'STLPct',
 'TOVPct',
 'TRBPct',
 'TSPct',
 'USGPct',
 'VORP',
 'WS/48_x',
 'Year',
 'PLAYER_AGE',
 'TEAM_ABBREVIATION_player',
 'WS_x',
 'Rk_trad',
 'Team',
 'G_x',
 'MP_x',
 'FG',
 'FGA_team',
 'FG%_x',
 '3P',
 '3PA',
 '3P%_x',
 '2P',
 '2PA',
 '2P%',
 'FT',
 'FTA_team',
 'FT%_x',
 'ORB',
