In [14]:
import pandas as pd

#Load the data. Create new columns for defensive rebounds and the score differential 
data = pd.read_csv("nbagamestats.csv")
data["DefensiveRebounds"] = data["TotalRebounds"] - data["OffRebounds"]
data["Opp.DefensiveRebounds"] = data["Opp.TotalRebounds"] - data["Opp.OffRebounds"]
data["ScoreDiff"] = data["TeamPoints"] - data["OpponentPoints"]
ncol = data.shape[0]
nrow = data.shape[1]


#Function to get the features for a specific team
def get_team_dataframe(teamname, allteamdata):
    team_dataframe = allteamdata.loc[allteamdata['Team'] == teamname]
    return(team_dataframe)

#Index of team names 
team_index = data.Team.unique()



In [15]:
#Dataframes for each team, for reference. 
ATL = get_team_dataframe("ATL", data)
BOS = get_team_dataframe("BOS", data)
BRK = get_team_dataframe("BRK", data)
CHO = get_team_dataframe("CHO", data)
CHI = get_team_dataframe("CHI", data)
CLE = get_team_dataframe("CLE", data)
DAL = get_team_dataframe("DAL", data)
DEN = get_team_dataframe("DEN", data)
DET = get_team_dataframe("DET", data)
GSW = get_team_dataframe("GSW", data)
HOU = get_team_dataframe("HOU", data)
IND = get_team_dataframe("IND", data)
LAC = get_team_dataframe("LAC", data)
LAL = get_team_dataframe("LAL", data)
MEM = get_team_dataframe("MEM", data)
MIA = get_team_dataframe("MIA", data)
MIL = get_team_dataframe("MIL", data)
MIN = get_team_dataframe("MIN", data)
NOP = get_team_dataframe("NOP", data)
NYK = get_team_dataframe("NYK", data)
OKC = get_team_dataframe("OKC", data)
ORL = get_team_dataframe("ORL", data)
PHI = get_team_dataframe("PHI", data)
PHO = get_team_dataframe("PHO", data)
POR = get_team_dataframe("POR", data)
SAC = get_team_dataframe("SAC", data)
SAS = get_team_dataframe("SAS", data)
TOR = get_team_dataframe("TOR", data)
UTA = get_team_dataframe("UTA", data)
WAS = get_team_dataframe("WAS", data)


In [27]:
#Dictionary of the teams and their dataframes
d = {}
for name in team_index:
    d[name] = get_team_dataframe(name, data)


#Old moving average function
def team_moving_average(teamdata):
    nrow = teamdata.shape[1]
    quant_data = teamdata[["TeamPoints", "FieldGoals", "FieldGoalsAttempted", "X3PointShots", "X3PointShotsAttempted", 
              "FreeThrows", "FreeThrowsAttempted", "OffRebounds", "DefensiveRebounds", "Assists", "Steals", 
             "Blocks", "Turnovers", "TotalFouls"]]
    q = quant_data.rolling(window = 5).mean().shift(1)
    rolling_df = q.dropna()
    #date = teamdata[["Date"]][5:]
    #rolling_df = rolling_df.append(date)
    return(rolling_df)
    
    
#New moving average function 
def team_moving_average2(teamname, data):
    teamdata = get_team_dataframe(teamname, data)
    nrow = teamdata.shape[1]
    quant_data = teamdata[["FieldGoals", "FieldGoalsAttempted", "X3PointShots", "X3PointShotsAttempted", 
              "FreeThrows", "FreeThrowsAttempted", "OffRebounds", "DefensiveRebounds", "Assists", "Steals", 
             "Blocks", "Turnovers", "TotalFouls"]]
    q = quant_data.rolling(window = 10).mean().shift(1)
    opp = teamdata["Opponent"]
    date = teamdata["Date"]
    teampoints = teamdata["ScoreDiff"]
    rolling_df = q.join([opp, date, teampoints])
    return(rolling_df)

In [None]:
#Combined frame
data_frames = [ATL_R, BOS_R, BRK_R, CHO_R, CHI_R, CLE_R, DAL_R, DEN_R, DET_R, GSW_R, 
               HOU_R, IND_R, LAC_R, LAL_R, MEM_R, MIA_R, MIL_R, MIN_R, NOP_R, NYK_R, 
               OKC_R, ORL_R, PHI_R, PHO_R, POR_R, SAC_R, SAS_R, TOR_R, UTA_R, WAS_R]
    




In [None]:
#Oponnent feature set 
quant_data_opp = teamdata[["Opp.FieldGoals", "Opp.FieldGoalsAttempted", "Opp.3PointShots", "Opp.3PointShotsAttempted", 
              "Opp.FreeThrows", "Opp.FreeThrowsAttempted", "Opp.OffRebounds", "Opp.DefensiveRebounds", "Opp.Assists", "Opp.Steals", 
             "Opp.Blocks", "Opp.Turnovers", "Opp.TotalFouls"]]

In [190]:
#Create final master dataframe 
complete_df = pd.DataFrame()
count = 0
for index, row in data.iterrows():
    #Identify home team, awayteam, 
    if row["Home"] == "Home":
        hometeam = row["Team"]
        awayteam = row["Opponent"]
        gamedate = row["Date"]
        leftminusright = int(row["ScoreDiff"])
        homeminusaway = leftminusright
        homedf = team_moving_average2(hometeam, data)
        awaydf = team_moving_average2(awayteam, data)
        homestats = homedf.loc[homedf["Date"] == gamedate]
        awaystats = awaydf.loc[awaydf["Date"] == gamedate]
        finalrow = pd.merge(homestats, awaystats, on = "Date", how = "left") 
        finalrow["HomeMinusAway"] = homeminusaway
        complete_df = complete_df.append(finalrow)
    if row["Home"] == "Away":
        hometeam = row["Opponent"]
        awayteam = row["Team"]
        gamedate = row["Date"]
        leftminusright = int(row["ScoreDiff"])
        homeminusaway = -leftminusright 
        homedf = team_moving_average2(hometeam, data)
        awaydf = team_moving_average2(awayteam, data)
        homestats = homedf.loc[homedf["Date"] == gamedate]
        awaystats = awaydf.loc[awaydf["Date"] == gamedate]
        finalrow = pd.merge(homestats, awaystats, on = "Date", how = "left") 
        finalrow["HomeMinusAway"] = homeminusaway
        complete_df = complete_df.append(finalrow)



In [195]:
#Correct number of rows 
complete_df.shape

#Drop the N/A values 
curated_df = complete_df.dropna()

In [197]:
#Save data to computer 
export_csv = curated_df.to_csv (r'C:\Users\zacharydiamandis\Desktop\export_dataframe3.csv', index = None, header=True) 
        
        

In [196]:
curated_df.shape

(9524, 32)

In [199]:
list(complete_df.columns)

['FieldGoals_x',
 'FieldGoalsAttempted_x',
 'X3PointShots_x',
 'X3PointShotsAttempted_x',
 'FreeThrows_x',
 'FreeThrowsAttempted_x',
 'OffRebounds_x',
 'DefensiveRebounds_x',
 'Assists_x',
 'Steals_x',
 'Blocks_x',
 'Turnovers_x',
 'TotalFouls_x',
 'Opponent_x',
 'Date',
 'ScoreDiff_x',
 'FieldGoals_y',
 'FieldGoalsAttempted_y',
 'X3PointShots_y',
 'X3PointShotsAttempted_y',
 'FreeThrows_y',
 'FreeThrowsAttempted_y',
 'OffRebounds_y',
 'DefensiveRebounds_y',
 'Assists_y',
 'Steals_y',
 'Blocks_y',
 'Turnovers_y',
 'TotalFouls_y',
 'Opponent_y',
 'ScoreDiff_y',
 'HomeMinusAway']