# Basketball Question

### Importing Data / Packages

In [1]:
import pandas as pd
import numpy as np

In [2]:
#importing text documents
df_Event_Codes = pd.read_csv("Event_Codes.txt", sep = '\t')
df_Game_Lineup = pd.read_csv("Game_Lineup.txt", sep = '\t')

#sorting play by play as suggested in pdf
df_Play_by_Play = pd.read_csv("Play_by_Play.txt", sep = '\t').sort_values(['Game_id',
                                                                           'Period', 
                                                                           'PC_Time',
                                                                           'WC_Time',
                                                                           'Event_Num'],
                                                                          ascending = [0, 1, 0, 1, 1])

### Trying to Get Indicator variable for if player A is in

In [3]:
#creating merge of two dataframes

#gets correct team_id by merging play by play with game lineup. looking only at period 0 to extract info regardless if starter
df_merged = pd.merge(df_Play_by_Play.drop('Team_id', axis = 1),
                     df_Game_Lineup.loc[df_Game_Lineup.Period == 0][['Game_id', 'Person_id', 'Period', 'Team_id']],
                     left_on = ['Game_id', 'Person1'],
                     right_on = ['Game_id', 'Person_id']).drop('Period_y', axis = 1)

#merging to get information of whether player started specific period
df_merged = pd.merge(df_merged,
                     df_Game_Lineup[['Game_id', 'Person_id', 'Period']], how = 'left', left_on = ['Game_id', 'Person1', 'Period_x'],
                     right_on = ['Game_id', 'Person_id', 'Period'])

#resorting and resetting index
df_merged.sort_values(['Game_id','Period_x', 'PC_Time', 'WC_Time', 'Event_Num'],
                                                                          ascending = [0, 1, 0, 1, 1], inplace = True)

df_merged.reset_index(inplace = True, drop = True)

In [4]:
def findNonFreethrow(i, player_index):
    num = 0
    while (df_merged.iloc[player_index[i + num]]['Event_Msg_Type'] == 3 or 
           df_merged.iloc[player_index[i + num]]['Event_Msg_Type'] == 8):
        num += 1
        
    return num

def checkIfNotFreeThrow(i, player_index):
    num = 0
    while (df_merged.iloc[player_index[i - num]]['Event_Msg_Type'] == 8):
        num += 1
        
    if df_merged.iloc[player_index[i - num]]['Event_Msg_Type'] == 3:
        return False
    else:
        return True

In [5]:
#iterating through all players that were listed as starters or were subbed in
for player in df_Game_Lineup.Person_id.append(df_Play_by_Play.Person2).unique():
    
    #setting indicator value to 0 for all entries to start
    df_merged[player] = 0
    
    #defining boolean statement to select games of interest (games in which player played in)
    games = df_merged.Game_id.isin(df_Game_Lineup.loc[df_Game_Lineup.Person_id == player].Game_id.unique())

    #setting indicator value to 1 for those who start periods
    period_start_index = df_merged.loc[games].drop_duplicates(['Game_id', 'Period_x']).index

    #groups by period finds players that started their period and puts a 1 in the first entry for those who started their period
    start_of_period = ((df_merged.loc[games].groupby(['Game_id', 'Period'])
                    .apply(lambda x: (x['Person1'] == player).any())).astype(int))
    
    start_of_period = start_of_period.reset_index().sort_values(['Game_id', 'Period'], ascending = [0, 1]).reset_index().set_index(period_start_index)
    
    #if player doesnt play, no need to set anything
    if len(start_of_period) > 0:
        df_merged.loc[period_start_index, player] = start_of_period[0]
    
    #setting continual indicatos of being in game
    player_index = df_merged.loc[games].index
    freethrow_indicator = np.ones(len(player_index))
    
    for i in range(len(player_index) - 1):
    
        df_merged[player].iloc[player_index[i+1]] = (#current value (if already 1, should stay same)
                                    df_merged[player].iloc[player_index[i+1]] +
                        
                                    #no change in value if game or quarter ends:
                                    int((df_merged['Period_x'].iloc[player_index[i+1]] == 
                                         df_merged['Period_x'].iloc[player_index[i]])
                                    & (df_merged['Game_id'].iloc[player_index[i+1]] ==
                                       df_merged['Game_id'].iloc[player_index[i]])) *
                                    
                                    #checking if freethrow was shot:
                                    freethrow_indicator[i] *
                                    
                                    #previous value
                                     (df_merged[player].iloc[player_index[i]]
                        
                                      #checking if play before or after was freethrow
                                      + int(checkIfNotFreeThrow(i+1, player_index)) * 
                                      
                                    #subtract 1 if player has been subbed out ()
                                      +((- int((df_merged['Event_Msg_Type'].iloc[player_index[i+1]] == 8) & 
                                        (df_merged['Person1'].iloc[player_index[i+1]] == player))
                          
                                    #add 1 if player has been subbed in
                                       + (int((df_merged['Event_Msg_Type'].iloc[player_index[i]] == 8) & 
                                        (df_merged['Person2'].iloc[player_index[i]] == player)))))))
        
        
        #adjusting for freethrows
        if ((df_merged['Event_Msg_Type'].iloc[player_index[i+1]] == 8) &
                (not checkIfNotFreeThrow(i+1, player_index)) &
                (df_merged['Person1'].iloc[player_index[i+1]] == player) & 
                (df_merged['Game_id'].iloc[player_index[findNonFreethrow(i, player_index) + i]] == df_merged['Game_id'].iloc[i])):
                                     
                                    #setting 0 at correct spot
                                     freethrow_indicator[findNonFreethrow(i, player_index) + i] = 0
                            
        if ((df_merged['Event_Msg_Type'].iloc[player_index[i+1]] == 8) &
                (not checkIfNotFreeThrow(i+1, player_index)) &
                (df_merged['Person2'].iloc[player_index[i+1]] == player) & 
                (df_merged['Game_id'].iloc[player_index[findNonFreethrow(i, player_index) + i]] == df_merged['Game_id'].iloc[i])):
                
                                    #setting 1 at correct spot
                                     df_merged[player].iloc[player_index[findNonFreethrow(i, player_index) + i]] = 1

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self._setitem_with_indexer(indexer, value)
