<h1 style="color:cadetblue; font-size:2em;">pandas Data Structures</h1>

In [37]:
import pandas as pd


In [38]:
prices = [10.70, 10.86, 10.74, 10.71, 10.79]

In [39]:
# Creating Series
shares = pd.Series(prices)
print(shares)

0    10.70
1    10.86
2    10.74
3    10.71
4    10.79
dtype: float64


In [40]:
# Creating an Index
days = ['Mon', 'Tue', 'Wed', 'Thur', 'Fri']
shares = pd.Series(prices, index=days)
print(shares)

Mon     10.70
Tue     10.86
Wed     10.74
Thur    10.71
Fri     10.79
dtype: float64


In [41]:
# Examining an index
print(shares.index)
print(shares.index[2])
print(shares.index[:2])
print(shares.index[-2:])

Index(['Mon', 'Tue', 'Wed', 'Thur', 'Fri'], dtype='object')
Wed
Index(['Mon', 'Tue'], dtype='object')
Index(['Thur', 'Fri'], dtype='object')


In [42]:
# Modifying index name
print(shares.index.name)

shares.index.name = 'weekday'
print(shares)

None
weekday
Mon     10.70
Tue     10.86
Wed     10.74
Thur    10.71
Fri     10.79
dtype: float64


In [46]:
# Modifying index entries
shares.index[2] = 'Wednesday'

TypeError: Index does not support mutable operations

In [47]:
# Modifying index entries
shares.index[:4] = ['Monday', 'Tuesday', 'Wednesday', 'Thursday']

TypeError: Index does not support mutable operations

In [48]:
# Modifying all index entries
shares.index = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday']
print(shares)

Monday       10.70
Tuesday      10.86
Wednesday    10.74
Thursday     10.71
Friday       10.79
dtype: float64


<h1 style="color:cadetblue; font-size:2em;">Hierarchical Indexing</h1>

In [54]:
import pandas as pd
stocks = pd.read_csv('datasets/stocks.csv')
print(stocks)

         Date   Close    Volume Symbol
0  2016-10-03   31.50  14070500   CSCO
1  2016-10-03  112.52  21701800   AAPL
2  2016-10-03   57.42  19189500   MSFT
3  2016-10-04  113.00  29736800   AAPL
4  2016-10-04   57.24  20085900   MSFT
5  2016-10-04   31.35  18460400   CSCO
6  2016-10-05   57.64  16726400   MSFT
7  2016-10-05   31.59  11808600   CSCO
8  2016-10-05  113.05  21453100   AAPL


In [55]:
# Setting Index
stocks = stocks.set_index(['Symbol', 'Date'])
print(stocks)

                    Close    Volume
Symbol Date                        
CSCO   2016-10-03   31.50  14070500
AAPL   2016-10-03  112.52  21701800
MSFT   2016-10-03   57.42  19189500
AAPL   2016-10-04  113.00  29736800
MSFT   2016-10-04   57.24  20085900
CSCO   2016-10-04   31.35  18460400
MSFT   2016-10-05   57.64  16726400
CSCO   2016-10-05   31.59  11808600
AAPL   2016-10-05  113.05  21453100


In [56]:
# MultiIndex on DataFrame
print(stocks.index)


MultiIndex(levels=[['AAPL', 'CSCO', 'MSFT'], ['2016-10-03', '2016-10-04', '2016-10-05']],
           labels=[[1, 0, 2, 0, 2, 1, 2, 1, 0], [0, 0, 0, 1, 1, 1, 2, 2, 2]],
           names=['Symbol', 'Date'])


In [57]:
print(stocks.index.name)

None


In [58]:
print(stocks.index.names)

['Symbol', 'Date']


In [59]:
# Sorting index
stocks = stocks.sort_index()
print(stocks)

                    Close    Volume
Symbol Date                        
AAPL   2016-10-03  112.52  21701800
       2016-10-04  113.00  29736800
       2016-10-05  113.05  21453100
CSCO   2016-10-03   31.50  14070500
       2016-10-04   31.35  18460400
       2016-10-05   31.59  11808600
MSFT   2016-10-03   57.42  19189500
       2016-10-04   57.24  20085900
       2016-10-05   57.64  16726400


In [60]:
# Indexing (individual row)
stocks.loc[('CSCO', '2016-10-04')]

Close           31.35
Volume    18460400.00
Name: (CSCO, 2016-10-04), dtype: float64

In [61]:
stocks.loc[('CSCO', '2016-10-04'), 'Volume']

18460400.0

In [62]:
# Slicing (outermost index)
stocks.loc['AAPL']

Unnamed: 0_level_0,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2016-10-03,112.52,21701800
2016-10-04,113.0,29736800
2016-10-05,113.05,21453100


In [63]:
# Slicing (outermost index)
stocks.loc['CSCO':'MSFT']

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
CSCO,2016-10-03,31.5,14070500
CSCO,2016-10-04,31.35,18460400
CSCO,2016-10-05,31.59,11808600
MSFT,2016-10-03,57.42,19189500
MSFT,2016-10-04,57.24,20085900
MSFT,2016-10-05,57.64,16726400


In [64]:
# Fancy indexing (outermost index)
stocks.loc[(['AAPL', 'MSFT'], '2016-10-05'), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-05,113.05,21453100
MSFT,2016-10-05,57.64,16726400


In [65]:
stocks.loc[(['AAPL', 'MSFT'], '2016-10-05'), 'Close']

Symbol  Date      
AAPL    2016-10-05    113.05
MSFT    2016-10-05     57.64
Name: Close, dtype: float64

In [66]:
# Fancy indexing (innermost index)
stocks.loc[('CSCO', ['2016-10-05', '2016-10-03']), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
CSCO,2016-10-03,31.5,14070500
CSCO,2016-10-05,31.59,11808600


In [67]:
stocks.loc[(slice(None), slice('2016-10-03', '2016-10-04')), :]

Unnamed: 0_level_0,Unnamed: 1_level_0,Close,Volume
Symbol,Date,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,2016-10-03,112.52,21701800
AAPL,2016-10-04,113.0,29736800
CSCO,2016-10-03,31.5,14070500
CSCO,2016-10-04,31.35,18460400
MSFT,2016-10-03,57.42,19189500
MSFT,2016-10-04,57.24,20085900
