In [3]:
import numpy as np
import pandas as pd
import datetime

from datetime import datetime, date

pd.set_option('display.max_columns',7)
pd.set_option('display.max_rows',10)
pd.set_option('display.width',60)

import matplotlib.pyplot as plt

sp500 = pd.read_csv('data/sp500.csv',
                      index_col='Symbol',
                      usecols=[0,2,3,7])

In [5]:
np.random.seed(123456)
df = pd.DataFrame({'foo':np.random.random(10000),'key':range(100,10100)})
df[:5]

Unnamed: 0,foo,key
0,0.12697,100
1,0.966718,101
2,0.260476,102
3,0.897237,103
4,0.37675,104


In [7]:
df[df.key==10099]

Unnamed: 0,foo,key
9999,0.272283,10099


In [9]:
%timeit df[df.key==10099]

223 µs ± 4.23 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [13]:
df_with_index = df.set_index(['key'])
df_with_index[:5]

Unnamed: 0_level_0,foo
key,Unnamed: 1_level_1
100,0.12697
101,0.966718
102,0.260476
103,0.897237
104,0.37675


In [15]:
df_with_index.loc[10099]

foo    0.272283
Name: 10099, dtype: float64

In [33]:
%timeit df_with_index.loc[10099]

29.2 µs ± 586 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [35]:
temps = pd.DataFrame({'City':['Missoula','Philadelphia'],'Temperature':[70,80] })
temps

Unnamed: 0,City,Temperature
0,Missoula,70
1,Philadelphia,80


In [37]:
temps.columns

Index(['City', 'Temperature'], dtype='object')

In [40]:
df_i64 = pd.DataFrame(np.arange(10,20), index=np.arange(0,10))
df_i64[:5]

Unnamed: 0,0
0,10
1,11
2,12
3,13
4,14


In [42]:
df_i64.index

Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int32')

In [47]:
df_range = pd.DataFrame(np.arange(10,15))
df_range[:5]

Unnamed: 0,0
0,10
1,11
2,12
3,13
4,14


In [48]:
df_range.index

RangeIndex(start=0, stop=5, step=1)

In [54]:
df_f64 = pd.DataFrame(np.arange(0,1000,5),
                      np.arange(0.0,100.0,0.5))
df_f64.iloc[:5]

Unnamed: 0,0
0.0,0
0.5,5
1.0,10
1.5,15
2.0,20


In [56]:
df_f64.index

Index([ 0.0,  0.5,  1.0,  1.5,  2.0,  2.5,  3.0,  3.5,
        4.0,  4.5,
       ...
       95.0, 95.5, 96.0, 96.5, 97.0, 97.5, 98.0, 98.5,
       99.0, 99.5],
      dtype='float64', length=200)

In [61]:
df_interval = pd.DataFrame({'A':[1,2,3,4]},
                           index = pd.IntervalIndex.from_breaks(
                               [0,0.5,1.0,1.5,2.0]))
df_interval

Unnamed: 0,A
"(0.0, 0.5]",1
"(0.5, 1.0]",2
"(1.0, 1.5]",3
"(1.5, 2.0]",4


In [60]:
df_interval.index

IntervalIndex([(0.0, 0.5], (0.5, 1.0], (1.0, 1.5], (1.5, 2.0]], dtype='interval[float64, right]')

In [84]:
df_categorical = pd.DataFrame({'A':np.arange(6),
                               'B':list('aabbca')})
df_categorical['B'] = df_categorical['B'].astype('category',
                                                    list('cab'))
df_categorical

Unnamed: 0,A,B
0,0,a
1,1,a
2,2,b
3,3,b
4,4,c
5,5,a


In [85]:
df_categorical = df_categorical.set_index('B')
df_categorical.index

CategoricalIndex(['a', 'a', 'b', 'b', 'c', 'a'], categories=['a', 'b', 'c'], ordered=False, dtype='category', name='B')

In [88]:
df_categorical.loc[['a','b']]

Unnamed: 0_level_0,A
B,Unnamed: 1_level_1
a,0
a,1
a,5
b,2
b,3


In [90]:
df = pd.DataFrame([np.arange(10,12),
                   np.arange(12,14)],
                 columns=list('ab'),
                 index = list('vw'))
df

Unnamed: 0,a,b
v,10,11
w,12,13


In [92]:
df['a']

v    10
w    12
Name: a, dtype: int32

In [94]:
#행은 loc
df.loc['w']

a    12
b    13
Name: w, dtype: int32

In [96]:
s = pd.Series(np.arange(0,5), index = list('abcde'))
s

a    0
b    1
c    2
d    3
e    4
dtype: int32

In [98]:
s['b':'d']

b    1
c    2
d    3
dtype: int32

In [101]:
s.loc[['a','b','e']]

a    0
b    1
e    4
dtype: int32

In [103]:
sp500[:5]

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABT,Health Care,39.6,15.573
ABBV,Health Care,53.95,2.954
ACN,Information Technology,79.79,8.326
ACE,Financials,102.91,86.897


In [105]:
index_moved_to_col = sp500.reset_index()
index_moved_to_col[:5]

Unnamed: 0,Symbol,Sector,Price,Book Value
0,MMM,Industrials,141.14,26.668
1,ABT,Health Care,39.6,15.573
2,ABBV,Health Care,53.95,2.954
3,ACN,Information Technology,79.79,8.326
4,ACE,Financials,102.91,86.897


In [107]:
index_moved_to_col.set_index('Sector')[:5]

Unnamed: 0_level_0,Symbol,Price,Book Value
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Industrials,MMM,141.14,26.668
Health Care,ABT,39.6,15.573
Health Care,ABBV,53.95,2.954
Information Technology,ACN,79.79,8.326
Financials,ACE,102.91,86.897


In [114]:
reindexed = sp500.reindex(index=['MMM','ABBV','FOO'])
reindexed

Unnamed: 0_level_0,Sector,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,Industrials,141.14,26.668
ABBV,Health Care,53.95,2.954
FOO,,,


In [112]:
sp500.reindex(columns=['Price',
                       'Book Value',
                       'NewCol'])[:5]

Unnamed: 0_level_0,Price,Book Value,NewCol
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MMM,141.14,26.668,
ABT,39.6,15.573,
ABBV,53.95,2.954,
ACN,79.79,8.326,
ACE,102.91,86.897,


In [117]:
reindexed = sp500.reset_index()
multi_fi = reindexed.set_index(['Sector','Symbol'])
multi_fi[:5]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price,Book Value
Sector,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1
Industrials,MMM,141.14,26.668
Health Care,ABT,39.6,15.573
Health Care,ABBV,53.95,2.954
Information Technology,ACN,79.79,8.326
Financials,ACE,102.91,86.897


In [119]:
type(multi_fi.index)

pandas.core.indexes.multi.MultiIndex

In [121]:
multi_fi.index.get_level_values(0)


Index(['Industrials', 'Health Care', 'Health Care',
       'Information Technology', 'Financials',
       'Health Care', 'Information Technology',
       'Utilities', 'Health Care', 'Financials',
       ...
       'Utilities', 'Information Technology',
       'Information Technology', 'Financials',
       'Industrials', 'Information Technology',
       'Consumer Discretionary', 'Health Care',
       'Financials', 'Health Care'],
      dtype='object', name='Sector', length=500)

In [123]:
multi_fi.xs('Industrials')[:5]

Unnamed: 0_level_0,Price,Book Value
Symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
MMM,141.14,26.668
ALLE,52.46,0.0
APH,95.71,18.315
AVY,48.2,15.616
BA,132.41,19.87


In [125]:
multi_fi.xs('ALLE',level=1)

Unnamed: 0_level_0,Price,Book Value
Sector,Unnamed: 1_level_1,Unnamed: 2_level_1
Industrials,52.46,0.0


In [128]:
multi_fi.xs('Industrials',drop_level=False)[:5]

Unnamed: 0_level_0,Unnamed: 1_level_0,Price,Book Value
Sector,Symbol,Unnamed: 2_level_1,Unnamed: 3_level_1
Industrials,MMM,141.14,26.668
Industrials,ALLE,52.46,0.0
Industrials,APH,95.71,18.315
Industrials,AVY,48.2,15.616
Industrials,BA,132.41,19.87


In [129]:
multi_fi.xs(('Industrials','UPS'))

Price         102.73
Book Value      6.79
Name: (Industrials, UPS), dtype: float64