### Index objects and labeled data

#### pandas Data Structures
- Key building blocks
 - Indexes: Sequence of labels
 - Series: 1D array with Index
 - DataFrames: 2D array with Series as columns
- Indexes
 - Immutable - Like Dictionary Keys
 - Homogenous in data type (Like NumPy arrays)

In [2]:
import pandas as pd
import numpy as np

In [3]:
# creating a series 
prices = [10.70, 10.86, 10.74, 10.71, 10.79]

shares = pd.Series(prices)

shares

0    10.70
1    10.86
2    10.74
3    10.71
4    10.79
dtype: float64

In [4]:
days = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri']

shares = pd.Series(prices, index = days)

shares

Mon     10.70
Tue     10.86
Wed     10.74
Thur    10.71
Fri     10.79
dtype: float64

In [5]:
shares.index

Index(['Mon', 'Tue', 'Wed', 'Thur', 'Fri'], dtype='object')

In [6]:
shares.index[2]

'Wed'

In [7]:
shares.index[:2]

Index(['Mon', 'Tue'], dtype='object')

In [8]:
shares.index[-2:]

Index(['Thur', 'Fri'], dtype='object')

In [9]:
shares.index.name

In [11]:
shares.index.name = 'weekday'

In [12]:
shares

weekday
Mon     10.70
Tue     10.86
Wed     10.74
Thur    10.71
Fri     10.79
dtype: float64

In [13]:
# index objects cannot be indexed all at once, but entire index 
shares.index = ['Monday','Tuesday', 'Wednesday', 'Thursday', 'Friday']
shares 

Monday       10.70
Tuesday      10.86
Wednesday    10.74
Thursday     10.71
Friday       10.79
dtype: float64

In [25]:
sales = pd.read_csv('sales/sales.csv', index_col='month')
sales

Unnamed: 0_level_0,eggs,salt,spam
month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52
Jun,205,60.0,55


In [26]:
new_idx = [i.upper() for i in sales.index]
sales.index = new_idx
sales

Unnamed: 0,eggs,salt,spam
JAN,47,12.0,17
FEB,110,50.0,31
MAR,221,89.0,72
APR,77,87.0,20
MAY,132,,52
JUN,205,60.0,55


In [27]:
sales.index.name = 'MONTHS'
sales

Unnamed: 0_level_0,eggs,salt,spam
MONTHS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JAN,47,12.0,17
FEB,110,50.0,31
MAR,221,89.0,72
APR,77,87.0,20
MAY,132,,52
JUN,205,60.0,55


In [30]:
sales.columns.name = 'PRODUCTS'
sales

PRODUCTS,eggs,salt,spam
MONTHS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
JAN,47,12.0,17
FEB,110,50.0,31
MAR,221,89.0,72
APR,77,87.0,20
MAY,132,,52
JUN,205,60.0,55


In [31]:
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun']
sales.index = months
sales

PRODUCTS,eggs,salt,spam
Jan,47,12.0,17
Feb,110,50.0,31
Mar,221,89.0,72
Apr,77,87.0,20
May,132,,52
Jun,205,60.0,55


### Hierarchical Indexing

In [35]:
sales = pd.read_csv('sales/sales.csv')
sales.drop('month', axis=1)
sales['state'] = ['CA','CA','NY','NY','TX','TX']
sales['month'] = [1,2,1,2,1,2]
sales

Unnamed: 0,month,eggs,salt,spam,state
0,1,47,12.0,17,CA
1,2,110,50.0,31,CA
2,1,221,89.0,72,NY
3,2,77,87.0,20,NY
4,1,132,,52,TX
5,2,205,60.0,55,TX


In [36]:
sales = sales.set_index(['state','month'])
sales = sales.sort_index()
sales

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


In [38]:
sales = pd.read_csv('sales/sales.csv')
sales.drop('month', axis=1)
sales['state'] = ['CA','CA','NY','NY','TX','TX']
sales['month'] = [1,2,1,2,1,2]
sales = sales.set_index(['state'])
sales

Unnamed: 0_level_0,month,eggs,salt,spam
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,1,47,12.0,17
CA,2,110,50.0,31
NY,1,221,89.0,72
NY,2,77,87.0,20
TX,1,132,,52
TX,2,205,60.0,55


In [39]:
sales.loc['NY']

Unnamed: 0_level_0,month,eggs,salt,spam
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
NY,1,221,89.0,72
NY,2,77,87.0,20


In [42]:
sales = pd.read_csv('sales/sales.csv')
sales.drop('month', axis=1)
sales['state'] = ['CA','CA','NY','NY','TX','TX']
sales['month'] = [1,2,1,2,1,2]
sales = sales.set_index(['state','month'])
NY_month1 = sales.loc[('NY', 1)]
NY_month1

eggs    221.0
salt     89.0
spam     72.0
Name: (NY, 1), dtype: float64

In [43]:
CA_TX_month2 = sales.loc[(['CA','TX'], 2), :]
CA_TX_month2

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2,110,50.0,31
TX,2,205,60.0,55


In [45]:
all_month2 = sales.loc[(slice(None), 2), :]
all_month2

Unnamed: 0_level_0,Unnamed: 1_level_0,eggs,salt,spam
state,month,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
CA,2,110,50.0,31
NY,2,77,87.0,20
TX,2,205,60.0,55


### Pivoting DataFrames