In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor


In [17]:
import pandas as pd
import itertools
import numpy as np

# Assuming 'data' is your DataFrame and includes a column for minutes ('MIN')
# Create a simplified DataFrame for analysis
simplified_data = data[['Game_ID', 'Player_ID', 'Team', 'AST', 'PTS', 'MIN']]

# Calculate the average minutes per game for each player
average_minutes = simplified_data.groupby('Player_ID')['MIN'].mean()

# Filter players who average 10+ minutes per game
players_with_10_plus_minutes = average_minutes[average_minutes >= 10].index.tolist()
simplified_data = simplified_data[simplified_data['Player_ID'].isin(players_with_10_plus_minutes)]

# Initialize a dictionary to store assists and points for each player pair in each team
pair_stats = {}

# Get unique game IDs
game_ids = simplified_data['Game_ID'].unique()

# Looping through each game
for game_id in game_ids:
    game_subset = simplified_data[simplified_data['Game_ID'] == game_id]

    # Grouping players by team
    for team, team_subset in game_subset.groupby('Team'):
        # Create pairs of players within the team
        player_pairs = itertools.combinations(team_subset['Player_ID'], 2)

        # Loop through each pair and aggregate their assists and points
        for pair in player_pairs:
            player_a, player_b = pair
            assists_a = team_subset[team_subset['Player_ID'] == player_a]['AST'].sum()
            points_b = team_subset[team_subset['Player_ID'] == player_b]['PTS'].sum()

            pair_key = (team, player_a, player_b)

            if pair_key not in pair_stats:
                pair_stats[pair_key] = {'assists': [], 'points': []}

            pair_stats[pair_key]['assists'].append(assists_a)
            pair_stats[pair_key]['points'].append(points_b)

# Mapping player IDs to names
player_id_to_name = data[['Player_ID', 'PlayerName']].drop_duplicates().set_index('Player_ID')['PlayerName'].to_dict()

# Calculate correlation for each player pair within the same team
pairwise_correlations = []
for key, values in pair_stats.items():
    team, player_a, player_b = key
    if len(values['assists']) > 1 and len(values['points']) > 1:  # Ensure enough data for correlation
        correlation = np.corrcoef(values['assists'], values['points'])[0, 1]
        pairwise_correlations.append({
            'Team': team,
            'Player_A_ID': player_a,
            'Player_B_ID': player_b,
            'Correlation': correlation
        })

# Convert to DataFrame and sort by correlation
pairwise_correlations_df = pd.DataFrame(pairwise_correlations)
pairwise_correlations_df.sort_values(by='Correlation', ascending=False, inplace=True)

# Adding player names
pairwise_correlations_df['Player_A_Name'] = pairwise_correlations_df['Player_A_ID'].map(player_id_to_name)
pairwise_correlations_df['Player_B_Name'] = pairwise_correlations_df['Player_B_ID'].map(player_id_to_name)

# Displaying top correlations
top_correlations = pairwise_correlations_df[['Team', 'Player_A_Name', 'Player_B_Name', 'Correlation']].head()


  c /= stddev[:, None]
  c /= stddev[None, :]


In [246]:
pairwise_correlations_df[pairwise_correlations_df['Team'] == 'TOR'].head(25)

Unnamed: 0,Team,Player_A_ID,Player_B_ID,Correlation,Player_A_Name,Player_B_Name
1881,TOR,1630567,1630193,0.921765,Scottie Barnes,Immanuel Quickley
1887,TOR,1629628,1629018,0.705089,RJ Barrett,Gary Trent Jr.
276,TOR,1630173,1630201,0.567715,Precious Achiuwa,Malachi Flynn
310,TOR,1627751,1627783,0.555868,Jakob Poeltl,Pascal Siakam
299,TOR,1628449,203490,0.53267,Chris Boucher,Otto Porter Jr.
1892,TOR,1630193,1629018,0.523971,Immanuel Quickley,Gary Trent Jr.
1882,TOR,1629628,1628449,0.364797,RJ Barrett,Chris Boucher
311,TOR,1627751,1629018,0.362641,Jakob Poeltl,Gary Trent Jr.
288,TOR,1628384,1627783,0.288152,OG Anunoby,Pascal Siakam
309,TOR,1627751,203471,0.275493,Jakob Poeltl,Dennis Schroder


In [225]:
# injury_data = pd.read_csv('injury_data.csv')
# injury_data = injury_data.rename(columns = {'Player':'PlayerName'})
# injury_data.drop(['Team'], axis = 1, inplace = True)
# merged_data = data.merge(injury_data, on ='PlayerName', how = 'left')

In [137]:

data = pd.read_csv('all_data.csv')
test2_df = pd.read_csv('test2.csv')
test2_df = test2_df.rename(columns = {'Name':'PlayerName'})
test2_df.drop(['Unnamed: 0','Team'], axis =1, inplace = True)

# Merge the dataframes on the player's name
merged_data = pd.merge(data, test2_df, on='PlayerName', how = 'left')
relevant_props = ['Points', 'Assists', 'Rebounds', 'Pts+Rebs+Asts', 'Pts+Asts', 'Pts+Rebs', 'Rebs+Asts']

merged_data['Pts+Rebs+Asts'] = merged_data['PTS'] + merged_data['REB'] + merged_data['AST']
merged_data['Pts+Rebs'] = merged_data['PTS'] + merged_data['REB']
merged_data['Pts+Asts'] = merged_data['PTS'] + merged_data['AST']
merged_data['Rebs+Asts'] = merged_data['REB'] + merged_data['AST']
merged_data= merged_data.rename(columns = {'PTS':'Points', 'REB': 'Rebounds', 'AST': 'Assists'})
# merged_data = merged_data[merged_data['Prop'].isin(relevant_props)]
merged_data.to_csv('merged_data.csv', index = False)



In [220]:
un_d = merged_data['GAME_DATE'].unique()

In [1026]:
player_data = merged_data[(merged_data['PlayerName'] == 'OG Anunoby') & (merged_data['Prop'] == 'Points')]


# Filter for the specific opponent if selected
player_data = player_data[(player_data['Home'] == 'CHA') | (player_data['Away'] == 'CHA')]
player_data

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,Team,Home,Away,Value,Prop,Pts+Rebs+Asts,Pts+Rebs,Pts+Asts,Rebs+Asts,Prop_Result
1149,22023,1628384,22300354,"DEC 18, 2023",TOR vs. CHA,W,32,2,10,0.2,...,TOR,TOR,CHA,14.0,Points,9,7,7,4,-1.0
1173,22023,1628384,22301216,"DEC 08, 2023",TOR @ CHA,L,36,3,12,0.25,...,TOR,CHA,TOR,14.0,Points,18,14,15,7,-1.0


In [961]:
unique_player_props = merged_data[['PlayerName', 'Prop', 'Value']]

# List of relevant prop types - ensure these match exactly with your data
relevant_props = ["Points", "Assists", "Rebounds", "Pts+Rebs+Asts", 'Pts+Rebs', 'Pts+Asts', 'Rebs+Asts']

# Filter the DataFrame to include only relevant prop types
unique_player_props = unique_player_props[unique_player_props['Prop'].isin(relevant_props)]

# Check if the resulting DataFrame is empty
if unique_player_props.empty:
    print("No matching props found in the DataFrame.")
else:
    print("Filtered props:")
    print(unique_player_props)


Filtered props:
          PlayerName           Prop  Value
162    Grayson Allen         Points   12.5
163    Grayson Allen       Rebounds    4.0
164    Grayson Allen  Pts+Rebs+Asts   19.5
165    Grayson Allen       Pts+Rebs   16.5
166    Grayson Allen       Pts+Asts   15.5
...              ...            ...    ...
17946      Dean Wade       Rebounds    4.0
17947      Dean Wade       Rebounds    4.0
17948      Dean Wade       Rebounds    4.0
17949      Dean Wade       Rebounds    4.0
17950      Dean Wade       Rebounds    4.0

[6981 rows x 3 columns]


In [952]:
unique_player_props

Unnamed: 0,PlayerName,Prop,Value


In [70]:
import datetime as dt
merged_data['GAME_DATE'] = pd.to_datetime(merged_data['GAME_DATE'])

merged_data['GAME_DATE'].dt.strftime('%m-%d-%y')

0        01-11-24
1        01-09-24
2        01-06-24
3        01-05-24
4        01-03-24
           ...   
27517    10-25-23
27518    10-25-23
27519    10-25-23
27520    10-25-23
27521    10-25-23
Name: GAME_DATE, Length: 27522, dtype: object

In [51]:
merged_data[merged_data['Team'] == 'CHA'].sort_values(by ='GAME_DATE').tail(10)

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,Team,Home,Away,Value,Prop,Pts+Rebs+Asts,Pts+Rebs,Pts+Asts,Rebs+Asts,Prop_Result
15995,22023,1641706,22300518,2024-01-10,CHA vs. SAC,L,28,5,10,0.5,...,CHA,CHA,SAC,22.5,Pts+Rebs+Asts,18,16,17,3,-1.0
15996,22023,1641706,22300518,2024-01-10,CHA vs. SAC,L,28,5,10,0.5,...,CHA,CHA,SAC,19.5,Pts+Rebs,18,16,17,3,-1.0
15814,22023,1631121,22300518,2024-01-10,CHA vs. SAC,L,10,1,2,0.5,...,CHA,CHA,SAC,,,7,6,5,3,
21788,22023,1641733,22300518,2024-01-10,CHA vs. SAC,L,25,4,15,0.267,...,CHA,CHA,SAC,,,15,14,12,4,
2434,22023,1628970,22300518,2024-01-10,CHA vs. SAC,L,34,10,21,0.476,...,CHA,CHA,SAC,,,37,31,30,13,
15993,22023,1641706,22300518,2024-01-10,CHA vs. SAC,L,28,5,10,0.5,...,CHA,CHA,SAC,15.5,Points,18,16,17,3,-1.0
15997,22023,1641706,22300518,2024-01-10,CHA vs. SAC,L,28,5,10,0.5,...,CHA,CHA,SAC,17.5,Pts+Asts,18,16,17,3,-1.0
15994,22023,1641706,22300518,2024-01-10,CHA vs. SAC,L,28,5,10,0.5,...,CHA,CHA,SAC,4.0,Rebounds,18,16,17,3,-1.0
19420,22023,1630208,22300518,2024-01-10,CHA vs. SAC,L,26,3,5,0.6,...,CHA,CHA,SAC,,,20,17,11,12,
2315,22023,1630547,22300518,2024-01-10,CHA vs. SAC,L,20,3,8,0.375,...,CHA,CHA,SAC,,,9,8,9,1,


In [786]:
def get_player_absences(dataframe, player_name, team):
    # Filter games where the player's team played
    team_games = dataframe[dataframe['Team'] == team]['Game_ID'].unique()

    # Check if the player participated in each of these games
    player_absences = []
    for game in team_games:
        if not dataframe[(dataframe['Game_ID'] == game) & (dataframe['PlayerName'] == player_name)].empty:
            continue  # Player participated in this game
        player_absences.append(game)  # Player was absent for this game

    return player_absences

In [252]:

def analyze_prop_bet_enhanced(dataframe, player_name, team, opponent, injured_players, value, prop_type_adjusted):
    """
    Analyzes a player's prop bet considering various factors including home vs. away performance, 
    opponent's stats, and the impact of multiple teammates' absences.
    """
    # Filter data for the specified player and team
    player_data = dataframe[(dataframe['PlayerName'] == player_name)]

    # Check if there is enough data for analysis
    if player_data.empty:
        return f"No data available for player {player_name}."

    # Adjust prop_type to match column names in the dataframe

    all_injured_players_out_dates = set()
    for injured_player in injured_players:
        injured_player_out_dates = set(dataframe[(dataframe['PlayerName'] == injured_player)]['Game_ID'].unique())
        all_injured_players_out_dates.update(injured_player_out_dates)

    # Convert the set to a list for filtering
    unique_injured_players_out_dates = list(all_injured_players_out_dates)



    historical_performance_against_opponent = player_data[(player_data['Away'] == opponent) | (player_data['Home'] == opponent)][prop_type_adjusted]

    player_avg_minutes =  player_data['MIN'].mean()
    player_avg_minutes_with_teammates_out = player_data[player_data['Game_ID'].isin(unique_injured_players_out_dates)]['MIN'].mean()

    

    # Calculate player's performance with teammates out
    player_performance_with_teammates_out = player_data[player_data['Game_ID'].isin(unique_injured_players_out_dates)][prop_type_adjusted].mean()

    home_games = player_data[player_data['Home'] == team]
    away_games = player_data[player_data['Away'] == team]

    win_percentage_home = home_games['WL'].value_counts(normalize=True).get('W', 0) * 100
    win_percentage_away = away_games['WL'].value_counts(normalize=True).get('W', 0) * 100

    
    # Analysis based on the prop type
    if prop_type_adjusted in player_data.columns:
        # average_with_teammates_out = player_performance_with_teammates_out.mean()
        average_overall = player_data[prop_type_adjusted].mean()
        std_dev = player_data[prop_type_adjusted].std()
        average_home = player_data[player_data['Home'] == team][prop_type_adjusted].mean()
        average_away = player_data[player_data['Away'] == team][prop_type_adjusted].mean()
        average_against_opponent = historical_performance_against_opponent.mean()

        # Opponent stats analysis
        opponent_stat_given = None
        team_stat_given = None
        player_stat_given = None
        PER_given = None

        if prop_type_adjusted == 'Rebounds':
            team_data = pd.read_csv('team_stats/total-rebounds-per-game_data.csv')
            team_stat_given = team_data[team_data['Team'] == team]['Rank'].values[0]
            opponent_data = pd.read_csv('team_stats/opponent-total-rebounds-per-game_data.csv')
            opponent_stat_given = opponent_data[opponent_data['Team'] == opponent]['Rank'].values[0]
            player_stat = pd.read_csv('player_stats/rebounds_data.csv')
            if player_name in player_stat['Player'].values:
                player_stat_given = player_stat[player_stat['Player'] == player_name]['Rank'].values[0]
            else:
                print(f'{player_name} rank not available')
                player_stat_given = None
        elif prop_type_adjusted == 'Assists':
            opponent_data = pd.read_csv('team_stats/opponent-assists-per-game_data.csv')
            opponent_stat_given = opponent_data[opponent_data['Team'] == opponent]['Rank'].values[0]
            team_data = pd.read_csv('team_stats/assists-per-game_data.csv')
            team_stat_given = team_data[team_data['Team'] == team]['Rank'].values[0]
            player_stat = pd.read_csv('player_stats/assists_data.csv')
            if player_name in player_stat['Player'].values:
                player_stat_given = player_stat[player_stat['Player'] == player_name]['Rank'].values[0]
            else:
                print(f'{player_name} rank not available')
                player_stat_given = None

        elif prop_type_adjusted == 'Points':
            opponent_data = pd.read_csv('team_stats/opponent-points-per-game_data.csv')
            opponent_stat_given = opponent_data[opponent_data['Team'] == opponent]['Rank'].values[0]
            team_data = pd.read_csv('team_stats/points-per-game_data.csv')
            team_stat_given = team_data[team_data['Team'] == team]['Rank'].values[0]
            player_stat = pd.read_csv('player_stats/points_data.csv')
            PER= pd.read_csv('player_stats/nba-efficiency_data.csv')
            TSP = pd.read_csv('player_stats/ts-percentage_data.csv')

            if player_name in player_stat['Player'].values:
                player_stat_given = player_stat[player_stat['Player'] == player_name]['Rank'].values[0]
                PER_given = PER[PER['Player'] == player_name]['Value'].values[0]
            else:
                print(f'{player_name} rank not available')
                player_stat_given = None
                PER_given = None
                
        elif prop_type_adjusted == 'Pts+Rebs+Asts':
            opponent_data = pd.read_csv('team_stats/opponent-points-plus-rebounds-plus-assists-per-gam_data.csv')
            opponent_stat_given = opponent_data[opponent_data['Team'] == opponent]['Rank'].values[0]
            team_data = pd.read_csv('team_stats/points-plus-rebounds-plus-assists-per-game_data.csv')
            team_stat_given = team_data[team_data['Team'] == team]['Rank'].values[0]
            player_stat = pd.read_csv('player_stats/points-plus-rebounds-plus-assists_data.csv')

            if player_name in player_stat['Player'].values:
                player_stat_given = player_stat[player_stat['Player'] == player_name]['Rank'].values[0]
            else:
                print(f'{player_name} rank not available')
                player_stat_given = None
                
        elif prop_type_adjusted == 'Pts+Rebs':
            opponent_data = pd.read_csv('team_stats/opponent-points-plus-rebounds-per-game_data.csv')
            opponent_stat_given = opponent_data[opponent_data['Team'] == opponent]['Rank'].values[0]
            team_data = pd.read_csv('team_stats/points-plus-rebounds-per-game_data.csv')
            team_stat_given = team_data[team_data['Team'] == team]['Rank'].values[0]
            player_stat = pd.read_csv('player_stats/points-plus-rebounds_data.csv')

            if player_name in player_stat['Player'].values:
                player_stat_given = player_stat[player_stat['Player'] == player_name]['Rank'].values[0]
            else:
                print(f'{player_name} rank not available')
                player_stat_given = None
        elif prop_type_adjusted == 'Pts+Asts':
            opponent_data = pd.read_csv('team_stats/opponent-points-plus-assists-per-game_data.csv')
            opponent_stat_given = opponent_data[opponent_data['Team'] == opponent]['Rank'].values[0]
            team_data = pd.read_csv('team_stats/points-plus-assists-per-game_data.csv')
            team_stat_given = team_data[team_data['Team'] == team]['Rank'].values[0]
            player_stat = pd.read_csv('player_stats/points-plus-assists_data.csv')

            if player_name in player_stat['Player'].values:
                player_stat_given = player_stat[player_stat['Player'] == player_name]['Rank'].values[0]
            else:
                print(f'{player_name} rank not available')
                player_stat_given = None

        elif prop_type_adjusted == 'Rebs+Asts':
            opponent_data = pd.read_csv('team_stats/opponent-rebounds-plus-assists-per-game_data.csv')
            opponent_stat_given = opponent_data[opponent_data['Team'] == opponent]['Rank'].values[0]
            team_data = pd.read_csv('team_stats/rebounds-plus-assists-per-game_data.csv')
            team_stat_given = team_data[team_data['Team'] == team]['Rank'].values[0]
            player_stat = pd.read_csv('player_stats/rebounds-plus-assist_data.csv')

            if player_name in player_stat['Player'].values:
                player_stat_given = player_stat[player_stat['Player'] == player_name]['Rank'].values[0]
            else:
                print(f'{player_name} rank not available')
                player_stat_given = None
        

        # Final results including all factors
        results = {
            'Minutes Per Game':player_avg_minutes.round(1),
            'Field Goal %:': (player_data['FG_PCT'].mean()*100).round(0),
            '3PT Field Goal %:': (player_data['FG3_PCT'].mean()*100).round(0),
            'Free Throw %:': (player_data['FT_PCT'].mean()*100).round(0),
            f'average_minutes_with{injured_players}_out': player_avg_minutes_with_teammates_out,
            f'average_with_{injured_players}_out': player_performance_with_teammates_out,
            f'average_{prop_type_adjusted}_overall': average_overall.round(0),
            'std_dev': std_dev,
            f'average_{prop_type_adjusted}_home': average_home.round(0),
            f'average_{prop_type_adjusted}_away': average_away.round(0),
            f'average_{prop_type_adjusted}_against_opponent': average_against_opponent,
            f'average_{prop_type_adjusted}_with_teammates_out' : player_performance_with_teammates_out,
            'impact_on_performance': (player_performance_with_teammates_out - average_overall).round(1),
            f'above_{prop_type_adjusted}_with_teammates_out': player_performance_with_teammates_out > value,
            f'above_{prop_type_adjusted}_overall': average_overall > value,
            'win_percentage_home': f"{win_percentage_home:.2f}%",
            'win_percentage_away': f"{win_percentage_away:.2f}%"
        }

        if opponent_stat_given is not None:
            results[f'{opponent}_{prop_type_adjusted.lower()}_defense_rank (Out of 30)'] = opponent_stat_given
        if team_stat_given is not None:
            results[f'{team}_{prop_type_adjusted.lower()}_rank (Out of 30)'] = team_stat_given
        if player_stat_given is not None:
            results[f'{player_name}_{prop_type_adjusted.lower()}_rank (Out of 100)'] = player_stat_given
        if player_stat_given is not None:
            results[f'{player_name} Efficiency Rating %'] = PER_given
            

        return results
    else:
        return f"Prop type '{prop_type_adjusted}' not found in data."


In [253]:
enhanced_analysis_results = analyze_prop_bet_enhanced(merged_data, 'Derrick Jones Jr.', 'DAL', 'NYK', ['Rui Hachimura'], 9.5, 'Points')
enhanced_analysis_results

Derrick Jones Jr. rank not available


{'Minutes Per Game': 26.0,
 'Field Goal %:': 47.0,
 '3PT Field Goal %:': 30.0,
 'Free Throw %:': 48.0,
 "average_minutes_with['Rui Hachimura']_out": 19.0,
 "average_with_['Rui Hachimura']_out": 6.0,
 'average_Points_overall': 11.0,
 'std_dev': 6.800214333368993,
 'average_Points_home': 12.0,
 'average_Points_away': 9.0,
 'average_Points_against_opponent': 9.0,
 'average_Points_with_teammates_out': 6.0,
 'impact_on_performance': -4.6,
 'above_Points_with_teammates_out': False,
 'above_Points_overall': True,
 'win_percentage_home': '55.00%',
 'win_percentage_away': '57.89%',
 'NYK_points_defense_rank (Out of 30)': 7,
 'DAL_points_rank (Out of 30)': 7}

In [1032]:
player_data = merged_data[(merged_data['PlayerName'] == 'Josh Giddey') ]['Team']


In [1034]:
.sort_values('GAME_DATE', ascending=False).tail(10)


'OKC'

In [42]:
merged_data = pd.read_csv('merged_data.csv')

In [138]:


def prop_result(row):
    if row['Prop'] == 'Points':
        performance = row['Points']
    elif row['Prop'] == 'Assists':
        performance = row['Assists']
    elif row['Prop'] == 'Rebounds':
        performance = row['Rebounds']
    elif row['Prop'] == 'Pts+Rebs+Asts':
        performance = row['Points'] + row['Rebounds'] + row['Assists']
    elif row['Prop'] == 'Pts+Asts':
        performance = row['Points'] + row['Assists']
    elif row['Prop'] == 'Pts+Rebs':
        performance = row['Points'] + row['Rebounds']
    elif row['Prop'] == 'Rebs+Asts':
        performance = row['Rebounds'] + row['Assists']
    else:
        return None  # Undefined prop type

    if performance > row['Value']:
        return 1  # Over
    elif performance < row['Value']:
        return -1  # Under
    else:
        return 0  # Exact

# Apply the function to each row
merged_data['Prop_Result'] = merged_data.apply(prop_result, axis=1)

grouped_stats = merged_data.groupby(['PlayerName', 'Team', 'Value', 'Prop'])['Prop_Result'].value_counts(normalize=True).unstack(fill_value=0) * 100


# Rename columns for clarity
grouped_stats = grouped_stats.rename(columns={-1: 'Under %', 0: 'Exact %', 1: 'Over %'})

# Reset the index to turn the group by columns into regular columns
grouped_stats = grouped_stats.reset_index()

In [139]:
grouped_stats.sort_values(by='Over %', ascending=False).head(35)

Prop_Result,PlayerName,Team,Value,Prop,Under %,Exact %,Over %
88,Dennis Schroder,TOR,13.5,Pts+Asts,13.953488,0.0,86.046512
85,Dennis Schroder,TOR,4.0,Assists,9.302326,13.953488,76.744186
89,Dennis Schroder,TOR,17.0,Pts+Rebs+Asts,20.930233,2.325581,76.744186
87,Dennis Schroder,TOR,9.5,Points,27.906977,0.0,72.093023
237,Kevin Durant,PHX,31.5,Pts+Asts,29.411765,0.0,70.588235
86,Dennis Schroder,TOR,12.5,Pts+Rebs,32.55814,0.0,67.44186
176,Jalen Duren,DET,12.5,Rebs+Asts,33.333333,0.0,66.666667
175,Jalen Duren,DET,10.5,Rebounds,33.333333,0.0,66.666667
234,Kevin Durant,PHX,32.5,Pts+Rebs,35.294118,0.0,64.705882
236,Kevin Durant,PHX,4.5,Assists,35.294118,0.0,64.705882


## Player Game Logs

In [263]:
injury_data = pd.read_csv('injury_data.csv')

# Parse injury data to find out players who are 'Out' and 'Day to Day'
out_players = injury_data[injury_data['Details'].str.contains('Out')]['Player'].tolist()
day_to_day_players = injury_data[injury_data['Details'].str.contains('Day To Day')]['Player'].tolist()


In [266]:
day_to_day_players

['Brandon Miller',
 'Frank Ntilikina',
 'Ayo Dosunmu',
 'Dalen Terry',
 'Patrick Williams',
 'Max Strus',
 'Bruce Brown',
 'Aaron Nesmith',
 'Ivica Zubac',
 'Anthony Davis',
 'LeBron James',
 "D'Angelo Russell",
 'Santi Aldama',
 'Jaren Jackson Jr.',
 'Jimmy Butler',
 'Kyle Lowry',
 'Chris Livingston',
 'Shai Gilgeous-Alexander',
 'Mo Bamba',
 'Robert Covington',
 'Kenneth Lofton Jr.',
 'Jaden Springer',
 'Bol Bol',
 'Deandre Ayton',
 'Ibou Badji',
 'Malcolm Brogdon',
 'Jerami Grant',
 'Shaedon Sharpe',
 'Matisse Thybulle',
 'Keegan Murray',
 'Gradey Dick']

In [11]:
team_scores = data.groupby(['Game_ID', 'Team'])['PTS'].sum().reset_index()


In [14]:
t = pd.merge(data, team_scores, on=['Game_ID', 'Team'], suffixes=('', '_Team_Total'))
t

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,TOV,PF,PTS,PLUS_MINUS,VIDEO_AVAILABLE,PlayerName,Team,Home,Away,PTS_Team_Total
0,22023,1630173,22300573,"JAN 17, 2024",NYK vs. HOU,W,6,0,2,0.000,...,0,3,0,2,1,Precious Achiuwa,NYK,NYK,HOU,109
1,22023,1628384,22300573,"JAN 17, 2024",NYK vs. HOU,W,43,5,10,0.500,...,1,2,15,24,1,OG Anunoby,NYK,NYK,HOU,109
2,22023,1627853,22300573,"JAN 17, 2024",NYK vs. HOU,W,1,0,0,0.000,...,0,0,0,-5,1,Ryan Arcidiacono,NYK,NYK,HOU,109
3,22023,1628973,22300573,"JAN 17, 2024",NYK vs. HOU,W,37,11,21,0.524,...,2,2,30,12,1,Jalen Brunson,NYK,NYK,HOU,109
4,22023,1628978,22300573,"JAN 17, 2024",NYK vs. HOU,W,19,2,7,0.286,...,2,1,5,7,1,Donte DiVincenzo,NYK,NYK,HOU,109
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12995,22023,1630171,22300120,"NOV 01, 2023",CLE @ NYK,W,31,1,3,0.333,...,1,6,2,-1,1,Isaac Okoro,CLE,NYK,CLE,95
12996,22023,1641854,22300120,"NOV 01, 2023",CLE @ NYK,W,11,1,1,1.000,...,0,3,4,3,1,Craig Porter,CLE,NYK,CLE,95
12997,22023,1629622,22300120,"NOV 01, 2023",CLE @ NYK,W,35,3,9,0.333,...,5,3,9,3,1,Max Strus,CLE,NYK,CLE,95
12998,22023,202684,22300120,"NOV 01, 2023",CLE @ NYK,W,13,3,4,0.750,...,2,1,6,-2,1,Tristan Thompson,CLE,NYK,CLE,95


In [19]:
data = pd.read_csv('all_data.csv')
team_scores = data.groupby(['Game_ID', 'Team'])['PTS'].sum().reset_index()
data= pd.merge(data, team_scores, on=['Game_ID', 'Team'], suffixes=('', '_Team_Total'))
test2_df = pd.read_csv('test2.csv')
test2_df = test2_df.rename(columns = {'Name':'PlayerName'})
test2_df.drop(['Unnamed: 0','Team'], axis =1, inplace = True)

# Merge the dataframes on the player's name
merged_data = pd.merge(data, test2_df, on='PlayerName', how = 'left')

merged_data['Pts+Rebs+Asts'] = merged_data['PTS'] + merged_data['REB'] + merged_data['AST']
merged_data['Pts+Rebs'] = merged_data['PTS'] + merged_data['REB']
merged_data['Pts+Asts'] = merged_data['PTS'] + merged_data['AST']
merged_data['Rebs+Asts'] = merged_data['REB'] + merged_data['AST']
merged_data= merged_data.rename(columns = {'PTS':'Points', 'REB': 'Rebounds', 'AST': 'Assists'})
merged_data.to_csv('merged_data.csv', index = False)

dataframe = pd.read_csv('merged_data.csv')

In [84]:
dataframe

Unnamed: 0,SEASON_ID,Player_ID,Game_ID,GAME_DATE,MATCHUP,WL,MIN,FGM,FGA,FG_PCT,...,Team,Home,Away,PTS_Team_Total,Value,Prop,Pts+Rebs+Asts,Pts+Rebs,Pts+Asts,Rebs+Asts
0,22023,1630173,22300573,"JAN 17, 2024",NYK vs. HOU,W,6,0,2,0.000,...,NYK,NYK,HOU,109,,,4,4,0,4
1,22023,1628384,22300573,"JAN 17, 2024",NYK vs. HOU,W,43,5,10,0.500,...,NYK,NYK,HOU,109,14.5,Points,20,20,15,5
2,22023,1628384,22300573,"JAN 17, 2024",NYK vs. HOU,W,43,5,10,0.500,...,NYK,NYK,HOU,109,4.5,Rebounds,20,20,15,5
3,22023,1628384,22300573,"JAN 17, 2024",NYK vs. HOU,W,43,5,10,0.500,...,NYK,NYK,HOU,109,22.5,Pts+Rebs+Asts,20,20,15,5
4,22023,1628384,22300573,"JAN 17, 2024",NYK vs. HOU,W,43,5,10,0.500,...,NYK,NYK,HOU,109,26.5,Fantasy Score,20,20,15,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27368,22023,1630171,22300120,"NOV 01, 2023",CLE @ NYK,W,31,1,3,0.333,...,CLE,NYK,CLE,95,,,7,5,4,5
27369,22023,1641854,22300120,"NOV 01, 2023",CLE @ NYK,W,11,1,1,1.000,...,CLE,NYK,CLE,95,,,5,5,4,1
27370,22023,1629622,22300120,"NOV 01, 2023",CLE @ NYK,W,35,3,9,0.333,...,CLE,NYK,CLE,95,,,17,13,13,8
27371,22023,202684,22300120,"NOV 01, 2023",CLE @ NYK,W,13,3,4,0.750,...,CLE,NYK,CLE,95,,,11,11,6,5


In [118]:
opra = pd.read_csv('team_stats/opponent-points-plus-rebounds-plus-assists-per-gam_data.csv')
opra.rename(columns={'Rank': 'PRA_Defense_Rank'}, inplace=True)
opra = opra[['Team','PRA_Defense_Rank']]
pra = pd.read_csv('team_stats/points-plus-rebounds-plus-assists-per-game_data.csv')
pra.rename(columns={'Rank': 'PRA_Rank'}, inplace=True)
pra = pra[['Team','PRA_Rank']]

opr = pd.read_csv('team_stats/opponent-points-plus-rebounds-per-game_data.csv')
opr.rename(columns={'Rank': 'PR_Defense_Rank'}, inplace=True)
opr = opr[['Team','PR_Defense_Rank']]
pr = pd.read_csv('team_stats/points-plus-rebounds-per-game_data.csv')
pr.rename(columns={'Rank': 'PR_Rank'}, inplace=True)
pr = pr[['Team','PR_Rank']]


opa = pd.read_csv('team_stats/opponent-points-plus-assists-per-game_data.csv')
opa.rename(columns={'Rank': 'PA_Defense_Rank'}, inplace=True)
opa = opa[['Team','PA_Defense_Rank']]
pa = pd.read_csv('team_stats/points-plus-assists-per-game_data.csv')
pa.rename(columns={'Rank': 'PA_Rank'}, inplace=True)
pa = pa[['Team','PA_Rank']]


ora = pd.read_csv('team_stats/opponent-rebounds-plus-assists-per-game_data.csv')
ora.rename(columns={'Rank': 'RA_Defense_Rank'}, inplace=True)
ora = ora[['Team','RA_Defense_Rank']]
ra = pd.read_csv('team_stats/rebounds-plus-assists-per-game_data.csv')
ra.rename(columns={'Rank': 'RA_Rank'}, inplace=True)
ra = ra[['Team','RA_Rank']]


op = pd.read_csv('team_stats/opponent-points-per-game_data.csv')
op.rename(columns={'Rank': 'Points_Defense_Rank'}, inplace=True)
op = op[['Team','Points_Defense_Rank']]
pts = pd.read_csv('team_stats/points-per-game_data.csv')
pts.rename(columns={'Rank': 'Points_Rank'}, inplace=True)
pts = pts[['Team','Points_Rank']]


oreb = pd.read_csv('team_stats/opponent-total-rebounds-per-game_data.csv')
oreb.rename(columns={'Rank': 'Rebounds_Defense_Rank'}, inplace=True)
oreb = oreb[['Team','Rebounds_Defense_Rank']]
reb = pd.read_csv('team_stats/total-rebounds-per-game_data.csv')
reb.rename(columns={'Rank': 'Rebounds_Rank'}, inplace=True)
reb = reb[['Team','Rebounds_Rank']]


oa = pd.read_csv('team_stats/opponent-assists-per-game_data.csv')
oa.rename(columns={'Rank': 'Assists_Defense_Rank'}, inplace=True)
oa = oa[['Team','Assists_Defense_Rank']]
ast = pd.read_csv('team_stats/assists-per-game_data.csv')
ast.rename(columns={'Rank': 'Assists_Rank'}, inplace=True)
ast = ast[['Team','Assists_Rank']]


In [123]:
result = merged_data.merge(opra, on='Team').merge(pra, on='Team').merge(pa, on='Team').merge(opr, on='Team').merge(pr, on='Team').merge(opa, on='Team').merge(ora, on='Team').merge(ra, on='Team').merge(op, on='Team').merge(pts, on='Team').merge(oreb, on='Team').merge(reb, on='Team').merge(oa, on='Team').merge(ast, on='Team')



In [125]:
result.columns

Index(['SEASON_ID', 'Player_ID', 'Game_ID', 'GAME_DATE', 'MATCHUP', 'WL',
       'MIN', 'FGM', 'FGA', 'FG_PCT', 'FG3M', 'FG3A', 'FG3_PCT', 'FTM', 'FTA',
       'FT_PCT', 'OREB', 'DREB', 'Rebounds', 'Assists', 'STL', 'BLK', 'TOV',
       'PF', 'Points', 'PLUS_MINUS', 'VIDEO_AVAILABLE', 'PlayerName', 'Team',
       'Home', 'Away', 'Value', 'Prop', 'Pts+Rebs+Asts', 'Pts+Rebs',
       'Pts+Asts', 'Rebs+Asts', 'Prop_Result', 'PRA_Defense_Rank', 'PRA_Rank',
       'PA_Rank', 'PR_Defense_Rank', 'PR_Rank', 'PA_Defense_Rank',
       'RA_Defense_Rank', 'RA_Rank', 'Points_Defense_Rank', 'Points_Rank',
       'Rebounds_Defense_Rank', 'Rebounds_Rank', 'Assists_Defense_Rank',
       'Assists_Rank'],
      dtype='object')

In [128]:
matchup_opp = dataframe['MATCHUP'].str[-3:]


In [129]:
matchup_opp

0        HOU
1        HOU
2        HOU
3        HOU
4        HOU
        ... 
27368    NYK
27369    NYK
27370    NYK
27371    NYK
27372    NYK
Name: MATCHUP, Length: 27373, dtype: object