In [10]:
import pandas as pd
from scipy.stats import t
from math import sqrt

# Read the players file
players_df = pd.read_csv('nbadata/players.csv')

# Read the correlation file
metrics_df = pd.read_csv('nbaanalytics/player_correlations_spearman.csv', index_col=[0, 1])

# Read the relationships file
relationships_df = pd.read_json('nbaanalytics/player_relationships.json')

# Read the teams file
teams_df = pd.read_csv('nbadata/teams.csv')

In [11]:
# Create a dictionary to map player names to IDs
player_name_to_id = dict(zip(players_df.PLAYER_NAME, players_df.PLAYER_ID))

In [12]:
def get_metrics_for_players(player1_name, player2_name):
    # Convert player names to IDs
    player1_id = player_name_to_id.get(player1_name)
    player2_id = player_name_to_id.get(player2_name)


    if player1_id is None:
        return f"Player 1 ({player1_name}) not found.", None
    
    if player2_id is None:
        return f"Player 2 ({player2_name}) not found.", None

    query_string = f"({player1_id}, {player2_id})"
    metrics = metrics_df.columns

    # Fetch the metrics for the player pair
    try:
        # return the metrics for every metric 'REB', 'AST', 'STL', or 'PTS'
        player_pair_metrics = metrics_df.loc[query_string, metrics]
        games_together = relationships_df.loc[player1_id, player2_id]['num_games']
        return player_pair_metrics, games_together
    except KeyError:
        return "Metrics not found for the given player pair.", None

def spearman_pvalue(rho, n):
    if n < 3:
        return 1.0

    t_stat = rho * sqrt((n-2)/(1-rho**2))
    p_value = 2 * t.sf(abs(t_stat), n-2)
    return p_value

In [13]:
from scipy.stats import mvn
import numpy as np
def calculate_probability(corr):
    rho = abs(corr)
    mean = np.array([0, 0])
    cov = np.array([[1, rho], [rho, 1]])
        
    lower = np.array([0, 0])
    upper = np.array([np.inf, np.inf])

    prob, _ = mvn.mvnun(lower, upper, mean, cov)
    conditional_prob = prob / .5
    return conditional_prob

In [14]:


# Example usage
player1 = "Fred VanVleet"
player2 = "Max Strus"

player1_id = player_name_to_id.get(player1)
player2_id = player_name_to_id.get(player2)

result, num_games = get_metrics_for_players(player1, player2)
print(result)
print("Number of games played together:", num_games)


             REB       AST       STL       PTS
Metric                                        
REB    -0.223002 -0.098039 -0.648204 -0.883354
AST     0.171878  0.250046 -0.385046  0.006212
STL     0.183394  0.171626 -0.476442 -0.009593
PTS     0.114200 -0.329279  0.198868 -0.067485
Number of games played together: 12


In [15]:
teams = []
while True:
    players = input("Enter players: ").split(", ")
    if players == ['']:
        break
    teams.append(players)
paired_teams = [teams[i:i+2] for i in range(0, len(teams), 2)]

In [23]:
print(paired_teams)
metrics = ['REB', 'AST', 'PTS']
player_pair_metrics = {}
for team1, team2 in paired_teams:
# Get metrics for each player pair on opposing teams
    player_pairs = [sorted((player1, player2)) for player1 in team1 for player2 in team2]
    player_pairs = set(tuple(player_pair) for player_pair in player_pairs)
    player_pair_metric = {player_pair: get_metrics_for_players(player_pair[0], player_pair[1]) for player_pair in player_pairs}
    player_pair_metrics.update(player_pair_metric)

# Remove all player pairs with no metrics
for player_pair, metric in list(player_pair_metrics.items()):
    if type(metric[0]) == str:
        del player_pair_metrics[player_pair]

num_metrics = len(metrics)

# print in the following format: player1, player2, metric, value (if value > .66)
threshold = .7
num_comparisons = 0
comparable_pairs = {}
for player_pair, metric in player_pair_metrics.items():
    df = metric[0]
    num_games = metric[1]
    
    p1 = player_pair[0]
    p2 = player_pair[1]

    for index in df.index:
        for column in df.columns:
            val = round(df.at[index, column], 3)
            probability = round(calculate_probability(val), 3)
            ev = round(probability * 1.5, 3)
            p_value = spearman_pvalue(val, num_games)
            if abs(val) > threshold and p_value < .05 and num_games > 5:
                num_comparisons += 1
                comparable_pairs[(p1, p2, index, column)] = (val, probability, ev, num_games, p_value)
            #     if val > 0:
            #         print(f"\nMATCH | {p1}: {index}, {p2}: {column} \nGames: {num_games} | p-value: {p_value} | Correlation: {val} | Probability: {probability}\n")
            #     else:
            #         print(f"\nOPPOSITE | {p1}: {index}, {p2}: {column} \nGames: {num_games} | p-value: {p_value} | Correlation: {val} | Probability: {probability}\n")
                
# Now that we have a list of comparable pairs, we need to perform the Holm-Bonferroni correction
# Sort the pairs by correlation
sorted_pairs = sorted(comparable_pairs.items(), key=lambda x: x[1][0], reverse=True)
print(sorted_pairs)

[[['Jamal Murray', 'Nikola Jokic'], ['Derrick Jones Jr.', 'Grant Williams']], [['Collin Sexton'], ['Mikal Bridges']]]
Number of player pairs: 5
dict_keys([('Derrick Jones Jr.', 'Jamal Murray'), ('Derrick Jones Jr.', 'Nikola Jokic'), ('Grant Williams', 'Jamal Murray'), ('Grant Williams', 'Nikola Jokic'), ('Collin Sexton', 'Mikal Bridges')])

MATCH | Derrick Jones Jr.: REB, Jamal Murray: PTS 
Games: 10 | p-value: 0.004326271821339064 | Correlation: 0.812 | Probability: 0.802


OPPOSITE | Derrick Jones Jr.: AST, Jamal Murray: STL 
Games: 10 | p-value: 8.126898130186776e-05 | Correlation: -0.933 | Probability: 0.883


OPPOSITE | Derrick Jones Jr.: AST, Jamal Murray: PTS 
Games: 10 | p-value: 0.0007829830656000007 | Correlation: -0.88 | Probability: 0.842


MATCH | Derrick Jones Jr.: PTS, Jamal Murray: REB 
Games: 10 | p-value: 0.000503071955486779 | Correlation: 0.893 | Probability: 0.851


MATCH | Derrick Jones Jr.: REB, Nikola Jokic: AST 
Games: 18 | p-value: 0.00015344585802384397 | Cor

  prob, _ = mvn.mvnun(lower, upper, mean, cov)
  t_stat = rho * sqrt((n-2)/(1-rho**2))


Work to do: Holm-Bonferroni correction.

You need to implement batching so you can appropriately do the correction