In [13]:
# Random Forest 
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog, commonplayerinfo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder

# Function to get player ID
def get_player_id(player_name):
    player_dict = players.get_players()
    player = [p for p in player_dict if p['full_name'].lower() == player_name.lower()]
    return player[0]['id'] if player else None

# Function to get player's career start year
def get_player_career_start_year(player_id):
    player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id)
    career_start_year = player_info.get_data_frames()[0]['FROM_YEAR'][0]
    return int(career_start_year)

# Interactive player selection
player_name = input("Enter the full name of the NBA player: ")
player_id = get_player_id(player_name)

if player_id is None:
    print(f"No player found with the name '{player_name}'.")
else:
    start_year = get_player_career_start_year(player_id)

    # Initialize an empty dataframe to store all seasons' data
    all_seasons_data = pd.DataFrame()

    # Loop from the player's start year to 2023
    for year in range(start_year, 2024):
        season_str = str(year) + '-' + str(year + 1)[-2:]  # Formatting the season string
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season_str)
        season_data = gamelog.get_data_frames()[0]
        all_seasons_data = pd.concat([all_seasons_data, season_data], ignore_index=True)

    # Feature engineering for 'MATCHUP'
    all_seasons_data['OPPONENT_TEAM'] = all_seasons_data['MATCHUP'].apply(lambda x: x.split(' ')[-1])

    # One-hot encoding 'OPPONENT_TEAM'
    encoder = OneHotEncoder(sparse=False)
    opponent_team_encoded = encoder.fit_transform(all_seasons_data[['OPPONENT_TEAM']])
    opponent_team_df = pd.DataFrame(opponent_team_encoded, columns=encoder.get_feature_names_out(['OPPONENT_TEAM']))

    # Merging the new features back into the original DataFrame
    df_encoded = all_seasons_data.join(opponent_team_df)

    # Define features and target
    feature_columns = [col for col in df_encoded.columns if 'OPPONENT_TEAM_' in col]
    X = df_encoded[feature_columns]

    # Interactive inputs
    input_score = float(input("Enter the score threshold: "))
    direction = input("Above or Below the score? (Enter 'above' or 'below'): ").lower()
    opponent_team = input("Enter the opponent team abbreviation (e.g., 'NYK' for New York Knicks): ").upper()

    # Creating binary target based on input_score and direction
    if direction == 'above':
        y = (df_encoded['PTS'] > input_score).astype(int)
    elif direction == 'below':
        y = (df_encoded['PTS'] < input_score).astype(int)
    else:
        raise ValueError("Direction must be 'above' or 'below'")

    # Splitting the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create and train Random Forest Classifier model
    random_forest_model = RandomForestClassifier(n_estimators=100, random_state=42)
    random_forest_model.fit(X_train, y_train)

    # Preparing the input for prediction
    input_data = pd.DataFrame(columns=feature_columns)
    input_data.loc[0, :] = 0
    input_data.loc[0, f'OPPONENT_TEAM_{opponent_team}'] = 1

    # Making a prediction
    prediction = random_forest_model.predict(input_data)
    prediction_probability = random_forest_model.predict_proba(input_data)

    # Interpreting the prediction
    prediction_result = "Yes" if prediction[0] == 1 else "No"
    probability = prediction_probability[0][prediction[0]]

    print(f"Prediction: {prediction_result}")
    print(f"Probability: {probability}")






Prediction: No
Probability: 0.5017537584992694


In [21]:
# LogisticRegression

from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog, commonplayerinfo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder

# Function to get player ID
def get_player_id(player_name):
    player_dict = players.get_players()
    player = [p for p in player_dict if p['full_name'].lower() == player_name.lower()]
    return player[0]['id'] if player else None

# Function to get player's career start year
def get_player_career_start_year(player_id):
    player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id)
    career_start_year = player_info.get_data_frames()[0]['FROM_YEAR'][0]
    return int(career_start_year)

# Interactive player selection
player_name = input("Enter the full name of the NBA player: ")
player_id = get_player_id(player_name)

if player_id is None:
    print(f"No player found with the name '{player_name}'.")
else:
    start_year = get_player_career_start_year(player_id)

    # Initialize an empty dataframe to store all seasons' data
    all_seasons_data = pd.DataFrame()

    # Loop from the player's start year to 2023
    for year in range(start_year, 2024):
        season_str = str(year) + '-' + str(year + 1)[-2:]  # Formatting the season string
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season_str)
        season_data = gamelog.get_data_frames()[0]
        all_seasons_data = pd.concat([all_seasons_data, season_data], ignore_index=True)

    # Feature engineering for 'MATCHUP'
    all_seasons_data['OPPONENT_TEAM'] = all_seasons_data['MATCHUP'].apply(lambda x: x.split(' ')[-1])

    # One-hot encoding 'OPPONENT_TEAM'
    encoder = OneHotEncoder(sparse=False)
    opponent_team_encoded = encoder.fit_transform(all_seasons_data[['OPPONENT_TEAM']])
    opponent_team_df = pd.DataFrame(opponent_team_encoded, columns=encoder.get_feature_names_out(['OPPONENT_TEAM']))

    # Merging the new features back into the original DataFrame
    df_encoded = all_seasons_data.join(opponent_team_df)

    # Define features and target
    feature_columns = [col for col in df_encoded.columns if 'OPPONENT_TEAM_' in col]
    X = df_encoded[feature_columns]

    # Interactive inputs
    input_score = float(input("Enter the score threshold: "))
    direction = input("Above or Below the score? (Enter 'above' or 'below'): ").lower()
    opponent_team = input("Enter the opponent team abbreviation (e.g., 'NYK' for New York Knicks): ").upper()

    # Creating binary target based on input_score and direction
    if direction == 'above':
        y = (df_encoded['PTS'] > input_score).astype(int)
    elif direction == 'below':
        y = (df_encoded['PTS'] < input_score).astype(int)
    else:
        raise ValueError("Direction must be 'above' or 'below'")

    # Splitting the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create and train Logistic Regression model
    logistic_model = LogisticRegression(max_iter=1000)
    logistic_model.fit(X_train, y_train)

    # Preparing the input for prediction
    input_data = pd.DataFrame(columns=feature_columns)
    input_data.loc[0, :] = 0
    input_data.loc[0, f'OPPONENT_TEAM_{opponent_team}'] = 1

    # Making a prediction
    prediction = logistic_model.predict(input_data)
    prediction_probability = logistic_model.predict_proba(input_data)

    # Interpreting the prediction
    prediction_result = "Yes" if prediction[0] == 1 else "No"
    probability = prediction_probability[0][prediction[0]]

    print(f"Prediction: {prediction_result}")
    print(f"Probability: {probability}")




Prediction: No
Probability: 0.6000709582290991


In [2]:
# Linear Regression
from nba_api.stats.static import players
from nba_api.stats.endpoints import playergamelog, commonplayerinfo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder

# Function to get player ID
def get_player_id(player_name):
    player_dict = players.get_players()
    player = [p for p in player_dict if p['full_name'].lower() == player_name.lower()]
    return player[0]['id'] if player else None

# Function to get player's career start year
def get_player_career_start_year(player_id):
    player_info = commonplayerinfo.CommonPlayerInfo(player_id=player_id)
    career_start_year = player_info.get_data_frames()[0]['FROM_YEAR'][0]
    return int(career_start_year)

# Interactive player selection
player_name = input("Enter the full name of the NBA player: ")
player_id = get_player_id(player_name)

if player_id is None:
    print(f"No player found with the name '{player_name}'.")
else:
    start_year = get_player_career_start_year(player_id)

    # Initialize an empty dataframe to store all seasons' data
    all_seasons_data = pd.DataFrame()

    # Loop from the player's start year to 2023
    for year in range(start_year, 2024):
        season_str = str(year) + '-' + str(year + 1)[-2:]  # Formatting the season string
        gamelog = playergamelog.PlayerGameLog(player_id=player_id, season=season_str)
        season_data = gamelog.get_data_frames()[0]
        all_seasons_data = pd.concat([all_seasons_data, season_data], ignore_index=True)

    # Feature engineering for 'MATCHUP'
    all_seasons_data['OPPONENT_TEAM'] = all_seasons_data['MATCHUP'].apply(lambda x: x.split(' ')[-1])

    # One-hot encoding 'OPPONENT_TEAM'
    encoder = OneHotEncoder(sparse=False)
    opponent_team_encoded = encoder.fit_transform(all_seasons_data[['OPPONENT_TEAM']])
    opponent_team_df = pd.DataFrame(opponent_team_encoded, columns=encoder.get_feature_names_out(['OPPONENT_TEAM']))

    # Merging the new features back into the original DataFrame
    df_encoded = all_seasons_data.join(opponent_team_df)

    # Define features
    feature_columns = [col for col in df_encoded.columns if 'OPPONENT_TEAM_' in col]
    X = df_encoded[feature_columns]
    y = df_encoded['PTS']  # Target is the points

    # Splitting the data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Create and train Linear Regression model
    linear_model = LinearRegression()
    linear_model.fit(X_train, y_train)

    # Interactive inputs
    input_score = float(input("Enter the score threshold: "))
    opponent_team = input("Enter the opponent team abbreviation (e.g., 'NYK' for New York Knicks): ").upper()

    # Preparing the input for prediction
    input_data = pd.DataFrame(columns=feature_columns)
    input_data.loc[0, :] = 0
    input_data.loc[0, f'OPPONENT_TEAM_{opponent_team}'] = 1

    # Making a prediction
    predicted_score = linear_model.predict(input_data)[0]

    # Interpreting the prediction
    prediction_result = "above" if predicted_score > input_score else "below"
    
    print(f"Predicted Score: {predicted_score:.2f}")
    print(f"Prediction: {player_name} will score {prediction_result} {input_score} points against {opponent_team}")




KeyboardInterrupt: Interrupted by user