In [1]:
import requests
import pandas as pd
import numpy as np

In [2]:
download_url = "https://raw.githubusercontent.com/fivethirtyeight/data/master/nba-elo/nbaallelo.csv"
target_csv_path = "nba_all_elo.csv"

response = requests.get(download_url)
with open(target_csv_path, "wb") as fout:
    fout.write(response.content)

In [3]:
nba = pd.read_csv(target_csv_path)
type(nba)

pandas.core.frame.DataFrame

In [4]:
nba.shape  # rows, columns

(126314, 23)

In [5]:
pd.set_option("display.precision", 2)  # leave 2 decimal places for all floats
nba.head()

Unnamed: 0,gameorder,game_id,lg_id,_iscopy,year_id,date_game,seasongame,is_playoffs,team_id,fran_id,...,win_equiv,opp_id,opp_fran,opp_pts,opp_elo_i,opp_elo_n,game_location,game_result,forecast,notes
0,1,194611010TRH,NBA,0,1947,11/1/1946,1,0,TRH,Huskies,...,40.29,NYK,Knicks,68,1300.0,1306.72,H,L,0.64,
1,1,194611010TRH,NBA,1,1947,11/1/1946,1,0,NYK,Knicks,...,41.71,TRH,Huskies,66,1300.0,1293.28,A,W,0.36,
2,2,194611020CHS,NBA,0,1947,11/2/1946,1,0,CHS,Stags,...,42.01,NYK,Knicks,47,1306.72,1297.07,H,W,0.63,
3,2,194611020CHS,NBA,1,1947,11/2/1946,2,0,NYK,Knicks,...,40.69,CHS,Stags,63,1300.0,1309.65,A,L,0.37,
4,3,194611020DTF,NBA,0,1947,11/2/1946,1,0,DTF,Falcons,...,38.86,WSC,Capitols,50,1300.0,1320.38,H,L,0.64,


In [6]:
nba.info()  # examine column labels, dtypes and other dataset metadata

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126314 entries, 0 to 126313
Data columns (total 23 columns):
gameorder        126314 non-null int64
game_id          126314 non-null object
lg_id            126314 non-null object
_iscopy          126314 non-null int64
year_id          126314 non-null int64
date_game        126314 non-null object
seasongame       126314 non-null int64
is_playoffs      126314 non-null int64
team_id          126314 non-null object
fran_id          126314 non-null object
pts              126314 non-null int64
elo_i            126314 non-null float64
elo_n            126314 non-null float64
win_equiv        126314 non-null float64
opp_id           126314 non-null object
opp_fran         126314 non-null object
opp_pts          126314 non-null int64
opp_elo_i        126314 non-null float64
opp_elo_n        126314 non-null float64
game_location    126314 non-null object
game_result      126314 non-null object
forecast         126314 non-null float64
notes     

In [7]:
nba.describe()

Unnamed: 0,gameorder,_iscopy,year_id,seasongame,is_playoffs,pts,elo_i,elo_n,win_equiv,opp_pts,opp_elo_i,opp_elo_n,forecast
count,126314.0,126314.0,126314.0,126314.0,126314.0,126314.0,126314.0,126314.0,126314.0,126314.0,126314.0,126314.0,126314.0
mean,31579.0,0.5,1988.2,43.53,0.06,102.73,1495.24,1495.24,41.71,102.73,1495.24,1495.24,0.5
std,18231.93,0.5,17.58,25.38,0.24,14.81,112.14,112.46,10.63,14.81,112.14,112.46,0.22
min,1.0,0.0,1947.0,1.0,0.0,0.0,1091.64,1085.77,10.15,0.0,1091.64,1085.77,0.02
25%,15790.0,0.0,1975.0,22.0,0.0,93.0,1417.24,1416.99,34.1,93.0,1417.24,1416.99,0.33
50%,31579.0,0.5,1990.0,43.0,0.0,103.0,1500.95,1500.95,42.11,103.0,1500.95,1500.95,0.5
75%,47368.0,1.0,2003.0,65.0,0.0,112.0,1576.06,1576.29,49.64,112.0,1576.06,1576.29,0.67
max,63157.0,1.0,2015.0,108.0,1.0,186.0,1853.1,1853.1,71.11,186.0,1853.1,1853.1,0.98


In [8]:
nba.describe(include=np.object)

Unnamed: 0,game_id,lg_id,date_game,team_id,fran_id,opp_id,opp_fran,game_location,game_result,notes
count,126314,126314,126314,126314,126314,126314,126314,126314,126314,5424
unique,63157,2,12426,104,53,104,53,3,2,231
top,200101230SEA,NBA,4/17/2013,BOS,Lakers,BOS,Lakers,H,W,at New York NY
freq,2,118016,30,5997,6024,5997,6024,63138,63157,440


In [9]:
nba["team_id"].value_counts()[:3]

BOS    5997
NYK    5769
LAL    5078
Name: team_id, dtype: int64

In [10]:
nba["fran_id"].value_counts()[:3]

Lakers     6024
Celtics    5997
Knicks     5769
Name: fran_id, dtype: int64

In [11]:
nba.loc[nba["fran_id"] == "Lakers", "team_id"].value_counts()

LAL    5078
MNL     946
Name: team_id, dtype: int64

In [12]:
nba.loc[nba["team_id"] == "MNL", "date_game"].min()

'1/1/1949'

In [13]:
nba.loc[nba["team_id"] == "MNL", "date_game"].max()

'4/9/1959'

In [14]:
nba.loc[nba["team_id"] == "MNL", "date_game"].agg(("min", "max"))

min    1/1/1949
max    4/9/1959
Name: date_game, dtype: object

In [15]:
nba.loc[nba["team_id"] == "BOS", "pts"].sum()  # total points of Boston Celtics

626484

In [16]:
revenues = pd.Series([5555, 7000, 1980])
revenues

0    5555
1    7000
2    1980
dtype: int64

In [17]:
revenues.values

array([5555, 7000, 1980], dtype=int64)

In [18]:
revenues.index

RangeIndex(start=0, stop=3, step=1)

In [19]:
city_revenues = pd.Series(revenues.values, index=["Amsterdam", "Toronto", "Tokyo"])
city_revenues

Amsterdam    5555
Toronto      7000
Tokyo        1980
dtype: int64

In [20]:
city_employee_count = pd.Series({"Amsterdam": 5, "Tokyo": 8})
city_employee_count

Amsterdam    5
Tokyo        8
dtype: int64

In [21]:
city_employee_count.keys()

Index(['Amsterdam', 'Tokyo'], dtype='object')

In [22]:
"Tokyo" in city_employee_count

True

In [23]:
"New York" in city_employee_count

False

In [24]:
city_data = pd.DataFrame({
    "revenue": city_revenues,
    "employee_count": city_employee_count
})
city_data

Unnamed: 0,revenue,employee_count
Amsterdam,5555,5.0
Tokyo,1980,8.0
Toronto,7000,


In [25]:
city_data.index

Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object')

In [26]:
city_data.values

array([[5.555e+03, 5.000e+00],
       [1.980e+03, 8.000e+00],
       [7.000e+03,       nan]])

In [27]:
city_data.axes[0]

Index(['Amsterdam', 'Tokyo', 'Toronto'], dtype='object')

In [28]:
city_data.axes[1]

Index(['revenue', 'employee_count'], dtype='object')

In [29]:
city_data.keys()

Index(['revenue', 'employee_count'], dtype='object')

In [30]:
"revenue" in city_data.keys()

True

In [31]:
"points" in nba.keys()

False

In [32]:
"pts" in nba.keys()

True

In [33]:
city_revenues["Toronto"]

7000

In [34]:
city_revenues[1]

7000

In [35]:
city_revenues[-1]

1980

In [36]:
city_revenues[1:]

Toronto    7000
Tokyo      1980
dtype: int64

In [37]:
city_revenues["Toronto":]

Toronto    7000
Tokyo      1980
dtype: int64

+ *.loc* refers to the **label index** and **include** closing element of the slice
+ *.iloc* refers to the **positional index** and **exclude** closing element of the slice