# Functions used for the NBA Model

In [6]:
# Normalize data function
def normalize(col):
    return (col - col.min()) / (col.max() - col.min())

In [7]:
# Replaces each columns in the dataframe to normalized stats
def replace(df):
    for cols in cols_to_norm:
        df['{}_norm'.format(cols)] = normalize(df[cols])
    return df

In [8]:
# Function to calculate distance between two points
def calc_distance(plyr1, plyr2):
    # Euclidean measurement between two player's stats
    # Measures the length of a segment connecting the two player's stats
    dist = np.sqrt(np.sum((plyr1-plyr2)**2))
    return dist

# Create a function to find the player and the next season
def find_player(df, player_id, player_season):
    for row in df.itertuples():
        if player_season == row.season_id and player_id == row.player_id:
            return row

In [9]:
def player_comparison_tool(df, current_player_season, current_player_id):
    
    # If player doesn't exist
    if (((df['season_id'] == current_player_season) & (df['player_id'] == current_player_id)).any() == False):
        print('Can\'t find player with id {} and season {}'.format(current_player_id, current_player_season))
        return

    # If player does exist
    for row in df.itertuples():
        if current_player_season == row.season_id and current_player_id == row.player_id:
            current_player_id = row.player_id
            break
    # Using the found current player id, use it to create the player's vector array
    # Done in 'Calculating Player Similarity' Section
    # Use command+shift+L to change df_norm to df
    current_player_vector = np.array([
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'pts_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'min_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fgm_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fga_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fg3m_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fg3a_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'ftm_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'fta_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'oreb_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'dreb_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'ast_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'stl_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'tov_norm']).item(),
        (df.loc[(df['player_id'] == current_player_id) & (df['season_id'] == current_player_season), 'blk_norm']).item(),
    ])
    print('Projecting player_id {0} for season {1}'.format(current_player_id,season_list[(season_list.index(row.season_id) + 1)]))

    player_distance = []

    # To measure the effectiveness of the model
    # Create a list with weighted values for each stats
    # Number in list corresponds to each stat respectively,
    # Ex. If you want pts to weigh more than the other stats increase
    # the value from one
    weighted_numbers = [1,1,1,1,1,1,1,1,1,1,1,1,1,1]

    for row in df.itertuples():
        compared_player_vector = np.array([
            row.pts_norm,
            row.min_norm,
            row.fgm_norm,
            row.fga_norm,
            row.fg3m_norm,
            row.fg3a_norm,
            row.ftm_norm,
            row.fta_norm,
            row.oreb_norm,
            row.dreb_norm,
            row.ast_norm,
            row.stl_norm,
            row.tov_norm,
            row.blk_norm
        ])

        vfunc = np.vectorize(calc_distance)
        distance_vect = vfunc(current_player_vector, compared_player_vector)

        # Get weighted distance by multiplying the distance vector by the weighted numbers
        weighted_distance = distance_vect * weighted_numbers
        number = np.sum(np.abs(weighted_distance))/len(distance_vect)
        player_distance.append(number)
        
    # New column in dataframe, distance
    # Sort the values of distance and place in ranked_df
    df['distance'] = player_distance
    ranked_df = df.sort_values('distance')

    # Stats to focus on
    stats = [
        'pts',
        'min',
        'fgm',
        'fga',
        'fg3m',
        'fg3a',
        'ftm',
        'fta',
        'oreb',
        'dreb',
        'ast',
        'stl',
        'tov',
        'blk'
    ]

    # Empty dictionary to use for projected stats
    projected_stats = {}
    
    for col in stats:
        sum_stat = 0
        sum_weight = 0
        # Take 10 rows skipping the first (that will be the current player season)
        for index,row in ranked_df.iloc[1:11].iterrows():
            # Dealing with list till 2017-18 for now
            if row.season_id == '2017-18':
                continue
            # Get player's next season
            weight = (1 / row.distance)
            next_season = season_list[(season_list.index(row.season_id) + 1)]
            # Find player's row using id and th next season
            player_next_season = find_player(ranked_df, row.player_id, next_season)
        
            # Can access attributes of an object
            # getattr()
            # Get attribute of the stat column that will be looped through
            # and access that from the row to be multiplied by the weight
            # If player_next_season doesn't exist skip
            if player_next_season == None:
                continue
            sum_stat += getattr(player_next_season,col) * weight
            sum_weight += weight
        projected_stats['player_id'] = current_player_id
        projected_stats['proj_season_id'] = season_list[(season_list.index(current_player_season) + 1)] 
        projected_stats['proj_' + col] = (sum_stat/sum_weight)
        
    
    
    # Find 10 most similar players in rows 1:11 of ranked_df
    
    # Determine their next season stats for current player projected stats
    
    return projected_stats

    