# NBA Preparation

In [32]:
import os.path
import datetime
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

In [33]:
if not os.path.exists( "../../data/raw/NBA07-17.csv" ):
    print("Missing dataset file")

In [34]:
nba = pd.read_csv("../../data/raw/NBA07-17.csv", index_col="Date", parse_dates=True)
nba.shape

(12726, 9)

In [35]:
nba.head()

Unnamed: 0_level_0,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Unnamed: 6,Unnamed: 7,Attend.,Notes
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2007-10-30,7:30 pm,Utah Jazz,117,Golden State Warriors,96,Box Score,,19832.0,
2007-10-30,7:30 pm,Houston Rockets,95,Los Angeles Lakers,93,Box Score,,18997.0,
2007-10-30,7:00 pm,Portland Trail Blazers,97,San Antonio Spurs,106,Box Score,,18797.0,
2007-10-31,8:00 pm,Dallas Mavericks,92,Cleveland Cavaliers,74,Box Score,,20562.0,
2007-10-31,8:30 pm,Seattle SuperSonics,103,Denver Nuggets,120,Box Score,,19380.0,


In [36]:
nba = nba.drop(nba.columns[[5, 6, 8]], axis=1)


In [37]:
nba.tail()

Unnamed: 0_level_0,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Attend.
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-06-01,9:00 pm,Cleveland Cavaliers,91,Golden State Warriors,113,19596.0
2017-06-04,8:00 pm,Cleveland Cavaliers,113,Golden State Warriors,132,19596.0
2017-06-07,9:00 pm,Golden State Warriors,118,Cleveland Cavaliers,113,20562.0
2017-06-09,9:00 pm,Golden State Warriors,116,Cleveland Cavaliers,137,20562.0
2017-06-12,9:00 pm,Cleveland Cavaliers,120,Golden State Warriors,129,19596.0


In [38]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 12726 entries, 2007-10-30 to 2017-06-12
Data columns (total 6 columns):
Start (ET)         12726 non-null object
Visitor/Neutral    12726 non-null object
PTS                12726 non-null int64
Home/Neutral       12726 non-null object
PTS.1              12726 non-null int64
Attend.            12725 non-null float64
dtypes: float64(1), int64(2), object(3)
memory usage: 696.0+ KB


In [39]:
nba.isnull().sum()

Start (ET)         0
Visitor/Neutral    0
PTS                0
Home/Neutral       0
PTS.1              0
Attend.            1
dtype: int64

In [40]:
print(nba[nba["Attend."].isnull()])

           Start (ET) Visitor/Neutral  PTS     Home/Neutral  PTS.1  Attend.
Date                                                                       
2015-04-15    8:00 pm  Boston Celtics  105  Milwaukee Bucks    100      NaN


According to ESPN.com, the attendance of this game was 17,316.

In [41]:
nba["Attend."] = nba["Attend."].fillna('17316.0')

In [42]:
nba.isnull().sum()

Start (ET)         0
Visitor/Neutral    0
PTS                0
Home/Neutral       0
PTS.1              0
Attend.            0
dtype: int64

In [43]:
nba.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 12726 entries, 2007-10-30 to 2017-06-12
Data columns (total 6 columns):
Start (ET)         12726 non-null object
Visitor/Neutral    12726 non-null object
PTS                12726 non-null int64
Home/Neutral       12726 non-null object
PTS.1              12726 non-null int64
Attend.            12726 non-null object
dtypes: int64(2), object(4)
memory usage: 696.0+ KB


### Identifying Wind and Losses

In [44]:
conditions = [
    (nba['PTS.1'] > nba['PTS']),
    (nba['PTS'] == nba['PTS.1'])]
choices = ['Home Win', 'Draw']
nba['Result'] = np.select(conditions, choices, default='Away Win')

In [45]:
nba.head()

Unnamed: 0_level_0,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Attend.,Result
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2007-10-30,7:30 pm,Utah Jazz,117,Golden State Warriors,96,19832,Away Win
2007-10-30,7:30 pm,Houston Rockets,95,Los Angeles Lakers,93,18997,Away Win
2007-10-30,7:00 pm,Portland Trail Blazers,97,San Antonio Spurs,106,18797,Home Win
2007-10-31,8:00 pm,Dallas Mavericks,92,Cleveland Cavaliers,74,20562,Away Win
2007-10-31,8:30 pm,Seattle SuperSonics,103,Denver Nuggets,120,19380,Home Win


In [46]:
pt = nba.groupby('Visitor/Neutral')[('PTS')].count()

In [47]:
pt

Visitor/Neutral
Atlanta Hawks             445
Boston Celtics            457
Brooklyn Nets             213
Charlotte Bobcats         283
Charlotte Hornets         121
Chicago Bulls             430
Cleveland Cavaliers       447
Dallas Mavericks          433
Denver Nuggets            417
Detroit Pistons           408
Golden State Warriors     432
Houston Rockets           427
Indiana Pacers            428
Los Angeles Clippers      425
Los Angeles Lakers        440
Memphis Grizzlies         427
Miami Heat                450
Milwaukee Bucks           409
Minnesota Timberwolves    396
New Jersey Nets           197
New Orleans Hornets       249
New Orleans Pelicans      160
New York Knicks           409
Oklahoma City Thunder     403
Orlando Magic             427
Philadelphia 76ers        412
Phoenix Suns              412
Portland Trail Blazers    424
Sacramento Kings          397
San Antonio Spurs         457
Seattle SuperSonics        41
Toronto Raptors           416
Utah Jazz               

In [48]:
nba['Difference'] = nba['PTS'] - nba['PTS.1']

In [49]:
nba.head()

Unnamed: 0_level_0,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Attend.,Result,Difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2007-10-30,7:30 pm,Utah Jazz,117,Golden State Warriors,96,19832,Away Win,21
2007-10-30,7:30 pm,Houston Rockets,95,Los Angeles Lakers,93,18997,Away Win,2
2007-10-30,7:00 pm,Portland Trail Blazers,97,San Antonio Spurs,106,18797,Home Win,-9
2007-10-31,8:00 pm,Dallas Mavericks,92,Cleveland Cavaliers,74,20562,Away Win,18
2007-10-31,8:30 pm,Seattle SuperSonics,103,Denver Nuggets,120,19380,Home Win,-17


### Filtering out teams

In [50]:
nba = nba[(nba.values  == "Los Angeles Lakers" )|(nba.values  == "Chicago Bulls" )
    |(nba.values  == "Los Angeles Clippers" )|(nba.values  == "Minnesota Timberwolves" )
          |(nba.values  == "Denver Nuggets" ) ]

In [51]:
nba.head(50)

Unnamed: 0_level_0,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Attend.,Result,Difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2007-10-30,7:30 pm,Houston Rockets,95,Los Angeles Lakers,93,18997,Away Win,2
2007-10-31,8:30 pm,Seattle SuperSonics,103,Denver Nuggets,120,19380,Home Win,-17
2007-10-31,7:30 pm,Chicago Bulls,103,New Jersey Nets,112,17342,Home Win,-9
2007-10-31,7:00 pm,Philadelphia 76ers,97,Toronto Raptors,106,19800,Home Win,-9
2007-11-02,8:00 pm,Washington Wizards,83,Boston Celtics,103,18624,Home Win,-20
2007-11-02,7:30 pm,Philadelphia 76ers,96,Chicago Bulls,85,22034,Away Win,11
2007-11-02,7:30 pm,Philadelphia 76ers,96,Chicago Bulls,85,22034,Away Win,11
2007-11-02,7:30 pm,Golden State Warriors,114,Los Angeles Clippers,120,19060,Home Win,-6
2007-11-02,7:00 pm,Denver Nuggets,99,Minnesota Timberwolves,91,19443,Away Win,8
2007-11-02,7:00 pm,Denver Nuggets,99,Minnesota Timberwolves,91,19443,Away Win,8


In [52]:
p1 = nba.groupby('Visitor/Neutral')[('PTS')].count()
p1.sort_values(ascending = False)

Visitor/Neutral
Boston Celtics            549
Los Angeles Lakers        538
Chicago Bulls             514
Denver Nuggets            510
Los Angeles Clippers      507
Philadelphia 76ers        493
Minnesota Timberwolves    478
Utah Jazz                 119
Golden State Warriors     111
Cleveland Cavaliers       109
Houston Rockets           108
Miami Heat                108
San Antonio Spurs         108
Dallas Mavericks          107
Phoenix Suns              106
Memphis Grizzlies         105
Portland Trail Blazers    105
Oklahoma City Thunder     104
Atlanta Hawks             103
Orlando Magic             103
Detroit Pistons           100
Sacramento Kings          100
New York Knicks            99
Washington Wizards         97
Milwaukee Bucks            96
Indiana Pacers             96
Toronto Raptors            93
New Orleans Hornets        65
Charlotte Bobcats          62
Brooklyn Nets              50
New Jersey Nets            48
New Orleans Pelicans       39
Charlotte Hornets       

In [53]:
nba.duplicated().sum()

613

In [54]:
nba.duplicated()

Date
2007-10-30    False
2007-10-31    False
2007-10-31    False
2007-10-31    False
2007-11-02    False
2007-11-02    False
2007-11-02     True
2007-11-02    False
2007-11-02    False
2007-11-02     True
2007-11-02    False
2007-11-03    False
2007-11-03    False
2007-11-04    False
2007-11-04    False
2007-11-04    False
2007-11-04    False
2007-11-04    False
2007-11-06    False
2007-11-06     True
2007-11-06    False
2007-11-06    False
2007-11-06    False
2007-11-07    False
2007-11-07     True
2007-11-07    False
2007-11-07    False
2007-11-08    False
2007-11-09    False
2007-11-09    False
              ...  
2017-04-16    False
2017-04-16     True
2017-04-18    False
2017-04-18     True
2017-04-18    False
2017-04-21    False
2017-04-21     True
2017-04-21    False
2017-04-23    False
2017-04-23     True
2017-04-23    False
2017-04-25    False
2017-04-26    False
2017-04-26     True
2017-04-28    False
2017-04-28     True
2017-04-28    False
2017-04-30    False
2017-04-30    F

In [55]:
nba.head(50)

Unnamed: 0_level_0,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Attend.,Result,Difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2007-10-30,7:30 pm,Houston Rockets,95,Los Angeles Lakers,93,18997,Away Win,2
2007-10-31,8:30 pm,Seattle SuperSonics,103,Denver Nuggets,120,19380,Home Win,-17
2007-10-31,7:30 pm,Chicago Bulls,103,New Jersey Nets,112,17342,Home Win,-9
2007-10-31,7:00 pm,Philadelphia 76ers,97,Toronto Raptors,106,19800,Home Win,-9
2007-11-02,8:00 pm,Washington Wizards,83,Boston Celtics,103,18624,Home Win,-20
2007-11-02,7:30 pm,Philadelphia 76ers,96,Chicago Bulls,85,22034,Away Win,11
2007-11-02,7:30 pm,Philadelphia 76ers,96,Chicago Bulls,85,22034,Away Win,11
2007-11-02,7:30 pm,Golden State Warriors,114,Los Angeles Clippers,120,19060,Home Win,-6
2007-11-02,7:00 pm,Denver Nuggets,99,Minnesota Timberwolves,91,19443,Away Win,8
2007-11-02,7:00 pm,Denver Nuggets,99,Minnesota Timberwolves,91,19443,Away Win,8


In [56]:
nba = nba.drop_duplicates(keep = 'first')

In [57]:
nba.head(50)

Unnamed: 0_level_0,Start (ET),Visitor/Neutral,PTS,Home/Neutral,PTS.1,Attend.,Result,Difference
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2007-10-30,7:30 pm,Houston Rockets,95,Los Angeles Lakers,93,18997,Away Win,2
2007-10-31,8:30 pm,Seattle SuperSonics,103,Denver Nuggets,120,19380,Home Win,-17
2007-10-31,7:30 pm,Chicago Bulls,103,New Jersey Nets,112,17342,Home Win,-9
2007-10-31,7:00 pm,Philadelphia 76ers,97,Toronto Raptors,106,19800,Home Win,-9
2007-11-02,8:00 pm,Washington Wizards,83,Boston Celtics,103,18624,Home Win,-20
2007-11-02,7:30 pm,Philadelphia 76ers,96,Chicago Bulls,85,22034,Away Win,11
2007-11-02,7:30 pm,Golden State Warriors,114,Los Angeles Clippers,120,19060,Home Win,-6
2007-11-02,7:00 pm,Denver Nuggets,99,Minnesota Timberwolves,91,19443,Away Win,8
2007-11-02,7:30 pm,Los Angeles Lakers,119,Phoenix Suns,98,18422,Away Win,21
2007-11-03,7:30 pm,Chicago Bulls,72,Milwaukee Bucks,78,18717,Home Win,-6


In [58]:
nba.duplicated().sum()

0

In [59]:
nba.to_csv('600_NBA.csv')