In [3]:
!pip install nba_api

Collecting nba_api
  Downloading nba_api-1.9.0-py3-none-any.whl.metadata (5.8 kB)
Downloading nba_api-1.9.0-py3-none-any.whl (284 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.9/284.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: nba_api
Successfully installed nba_api-1.9.0


In [4]:
import numpy as np
import pandas as pd
from nba_api.stats.endpoints import playercareerstats, playerawards
from bokeh.plotting import figure, show
from bokeh.models import HoverTool, ColumnDataSource


def calculate_career_score(player_id, award_weights, position_weights):
    career_stats = playercareerstats.PlayerCareerStats(player_id).get_data_frames()[0]
    awards_df = playerawards.PlayerAwards(player_id).get_data_frames()[0]

    stats_per_game = {stat: career_stats[stat].sum() / career_stats['GP'].sum() for stat in ['PTS', 'REB', 'AST', 'STL', 'BLK', 'TOV', 'FG3M', 'FG3A']}
    role = classify_player(stats_per_game)


    # Normalize stats based on player role
    norm_stats = normalize_stats(stats_per_game, position_weights.get(role, {}))  # Normalize based on the role's weights
    # Calculate weighted award score
    parsed_awards = parse_awards(awards_df)
    weighted_awards_score = 0
    for award, count in parsed_awards.items():
        if award in award_weights:
            weighted_awards_score += count * award_weights[award]

    # Combine stats and awards into a total career score
    total_score = sum(norm_stats.values()) + weighted_awards_score

    return total_score

def classify_player(stats):
    thresholds = {
        "Rim Protector": {"BLK": 2.0, "REB": 8.0},
        "3PT Specialist": {"FG3M": 2.5, "FG3A": 6.0},
        "Playmaker": {"AST": 6.0, "TOV": 2.5},
        "Scorer": {"PTS": 20.0, "FGA": 15.0},
    }
    #^^^^MORE STYLES OF PLAY CAN BE ADDED^^^^^
    for role, criteria in thresholds.items():
        if all(stats.get(stat, 0) >= value for stat, value in criteria.items()):
            return role
    return "Position-Based"

def normalize_stats(stats, weights):
    norm_stats = {}
    for stat, value in stats.items():
        norm_stats[stat] = value * weights.get(stat, 1.0)
    return norm_stats

def parse_awards(awards_df):
    important_awards = {
        "NBA Most Valuable Player": "MVP",
        "Rookie of the Year": "ROY",
        "Defensive Player of the Year": "DPOY",
        "Sixth Man of the Year": "6MOY",
        "NBA Finals Most Valuable Player": "Finals MVP",
        "All-Defensive Team": "All-Defensive",
        "NBA Defensive Player of the Year": "DPOY",
        "NBA Champion": "Champ",
        "NBA All-Star": "All-Star",
    }
    parsed_awards = {award: 0 for award in important_awards.values()}
    all_nba_teams = set()

    for _, row in awards_df.iterrows():
        award_name = row['DESCRIPTION']

        # Count important awards
        if award_name in important_awards:
            parsed_awards[important_awards[award_name]] += 1

        # count all nba
        if "All-NBA" in award_name and "ALL_NBA_TEAM_NUMBER" in row and pd.notna(row["ALL_NBA_TEAM_NUMBER"]):
            team_number = int(row["ALL_NBA_TEAM_NUMBER"])
            all_nba_teams.add(team_number)

    # Add unique All-NBA counts
    for team_number in sorted(all_nba_teams):
        suffix = "st" if team_number == 1 else "nd" if team_number == 2 else "rd"
        parsed_awards[f"All-NBA {team_number}{suffix} Team"] = list(all_nba_teams).count(team_number)

    return parsed_awards

award_weights = {
    "MVP": 10.0,
    "ROY": 5.0,
    "DPOY": 7.0,
    "6MOY": 4.0,
    "Finals MVP": 12.0,
    "All-Defensive": 6.0,
    "Champ": 8.0,
    "All-Star": 5.0,
    "All-NBA 1st Team": 9.0,
    "All-NBA 2nd Team": 7.0,
    "All-NBA 3rd Team": 6.0
}

position_weights = {
    "Rim Protector": {"BLK": 1.5, "REB": 1.2},
    "3PT Specialist": {"FG3M": 1.5, "FG3A": 1.3},
    "Playmaker": {"AST": 1.4, "TOV": -0.5},
    "Scorer": {"PTS": 1.5, "FGA": 1.2},
    "Position-Based": {stat: 1.0 for stat in ["PTS", "REB", "AST", "STL", "BLK", "TOV", "FG3M", "FG3A"]},
}


player_id = input("Enter Player ID: ")
career_score = calculate_career_score(player_id, award_weights, position_weights)
print(f"Career Score: {career_score}")


Enter Player ID: 201939
Career Score: 194.77456140350876


calculate_career_score is a function that gives a score based on a players career.


FIRST takes their per game averages and i they reach a threshold of a certain stat they will be catergorize in play style RIM PROTECTOR , 3PT SHOOTER, PLAYMAKER, SCORER. If none of the catergories fit it will go back to position based.

NEXT Normalized those stats to the catergories. Calculates in the awards whatever weight is given. THEN the score is calulated like total_score = sum(norm_stats.values()) + weighted_awards_score

I would LOVE to start Machine Learning with this function.

In [5]:
import pandas as pd
import time
from nba_api.stats.endpoints import playercareerstats, playerawards

# player list
players = [
    ("Michael Jordan", "893"), ("LeBron James", "2544"), ("Tim Duncan", "1495"),
    ("Isiah Thomas", "78318"), ("Stephen Curry", "201939"), ("Nikola Jokić", "203999"),
    ("Jayson Tatum", "1628369"), ("David Thompson", "78326"), ("Giannis Antetokounmpo", "203507"),
    ("James Harden", "201935"), ("Bernard King", "77264"), ("Kobe Bryant", "977"),
    ("Steve Nash", "959"), ("Luka Dončić", "1629029"), ("Dirk Nowitzki", "1717"),
    ("Kevin Garnett", "708"), ("Kawhi Leonard", "202695"),("Tracy McGrady","1503"),
    ("Shai Gilgeous-Alexander","1628983"),("Magic Johnson", "77142"), ("Larry Bird", "1449"),
    ("Kareem Abdul-Jabbar", "76003")
]

def get_player_data(player_id):
    """Fetch career stats & awards for a player."""
    try:
        time.sleep(2)
        # collect career stats
        career = playercareerstats.PlayerCareerStats(player_id).get_data_frames()[0]
        career["PTS/G"] = career["PTS"] / career["GP"]
        career["REB/G"] = career["REB"] / career["GP"]
        career["AST/G"] = career["AST"] / career["GP"]
        career["FG%"] = career["FGM"] / career["FGA"]
        career["Seasons_Played"] = len(career["SEASON_ID"].unique())

        # collect awards
        awards_df = playerawards.PlayerAwards(player_id).get_data_frames()[0]
        awards = {"MVPs": 0, "All-Stars": 0, "Championships": 0}

        for _, row in awards_df.iterrows():
            if "NBA Most Valuable Player" in row["DESCRIPTION"]:
                awards["MVPs"] += 1
            elif "NBA All-Star" in row["DESCRIPTION"]:
                awards["All-Stars"] += 1
            elif "NBA Champion" in row["DESCRIPTION"]:
                awards["Championships"] += 1

        #  assume hall of fame problability with all stars
        hof_status = 1 if awards["MVPs"] > 0 or awards["All-Stars"] > 5 else 0

        features = {
            "Player_ID": player_id, "PTS/G": career["PTS/G"].values[0], "REB/G": career["REB/G"].values[0],
            "AST/G": career["AST/G"].values[0], "FG%": career["FG%"].values[0], "Seasons_Played": career["Seasons_Played"].values[0],
            "MVPs": awards["MVPs"], "All-Stars": awards["All-Stars"], "Championships": awards["Championships"],
            "Hall_of_Fame": hof_status
        }

        return pd.DataFrame([features])

    except Exception as e:
        print(f"Error fetching data for Player {player_id}: {e}")
        return None

# dataset for selected players
dataset = pd.concat([get_player_data(pid) for _, pid in players if get_player_data(pid) is not None], ignore_index=True)

# Save dataset for reuse
dataset.to_csv("small_nba_dataset.csv", index=False)
print("✅ Small NBA dataset saved to 'small_nba_dataset.csv'")

# Display first rows
print(dataset.to_string())

✅ Small NBA dataset saved to 'small_nba_dataset.csv'
   Player_ID      PTS/G      REB/G     AST/G       FG%  Seasons_Played  MVPs  All-Stars  Championships  Hall_of_Fame
0        893  28.207317   6.512195  5.865854  0.515077              15     5         17              6             1
1       2544  20.936709   5.468354  5.886076  0.416890              22     4         23              4             1
2       1495  21.109756  11.914634  2.731707  0.548563              19     2         16              5             1
3      78318  17.013889   2.902778  7.847222  0.424157              13     0         14              2             1
4     201939  17.487500   4.450000  5.900000  0.461942              16     2         12              4             1
5     203999   9.950000   7.000000  2.362500  0.511667              10     3          6              1             1
6    1628369  13.900000   5.025000  1.600000  0.475449               8     0          6              1             1
7      7832

DATASET FOR ML MVP PREDICTOR , CAN EDIT SAMPLE PLAYERS (For example high perfroming non mvps, role players, etc)


Will take about **2 minutes** to execute

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

dataset = pd.read_csv("small_nba_dataset.csv")

X = dataset.drop(columns=["Player_ID", "MVPs"]) #drop the answer!!!
y = (dataset["MVPs"] > 0).astype(int)

# split data to train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

#predict
y_pred = model.predict(X_test)

# Evaluate
accuracy = accuracy_score(y_test, y_pred)
print(f"✅ Model Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))


✅ Model Accuracy: 0.80
              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      0.75      0.86         4

    accuracy                           0.80         5
   macro avg       0.75      0.88      0.76         5
weighted avg       0.90      0.80      0.82         5

