##### homepage: http://pandas.pydata.org/index.html

# Indexing and Selecting Data

In [None]:
# https://pandas.pydata.org/pandas-docs/stable/indexing.html

# Basic

### 读入数据

In [4]:
import pandas as pd
import warnings
import numpy as np
import matplotlib.pyplot as plt
warnings.filterwarnings("ignore")

# df1 = pd.read_csv("./trainset/pcdata_0611.csv") # read_sql read_html

### 生成DataFrame

In [5]:
pd.DataFrame({'A': [1, 2, 3],
              'B': [4, 5, 6]},  
                    index=list('abc'))

Unnamed: 0,A,B
a,1,4
b,2,5
c,3,6


In [6]:
pd.DataFrame([[1, 2, 3],
              [4, 5, 6]],
                    index=list('ab'))

Unnamed: 0,0,1,2
a,1,2,3
b,4,5,6


In [7]:
dfd = pd.DataFrame({'A': [1, 2, 3],
                    'B': [4, 5, 6]},  
                    index=list('abc'))
dfd

Unnamed: 0,A,B
a,1,4
b,2,5
c,3,6


### .T
Transpose 

In [8]:
dfd.T

Unnamed: 0,a,b,c
A,1,2,3
B,4,5,6


### Index

In [11]:
dfd[["A","B"]]

Unnamed: 0,A,B
a,1,4
b,2,5
c,3,6


In [32]:
dfd[1:2]

Unnamed: 0,A,B,C
b,2,5,7


In [59]:
dfd[::2]

Unnamed: 0,A,B,C
a,1,4,5
c,3,6,9


### new column

In [13]:
tdf = dfd
tdf["C"] = tdf["A"] + tdf["B"]
tdf

Unnamed: 0,A,B,C
a,1,4,5
b,2,5,7
c,3,6,9


### new row

In [24]:
tdf.append(pd.DataFrame([[2,4,6]], columns=["A","B","C"]))

Unnamed: 0,A,B,C
a,1,4,5
b,2,5,7
c,3,6,9
0,2,4,6


In [23]:
# tdf2.reset_index()

Unnamed: 0,index,A,B,C
0,a,1,4,5
1,b,2,5,7
2,c,3,6,9
3,0,2,4,6


In [16]:
tdf.append(tdf)

Unnamed: 0,A,B,C
a,1,4,5
b,2,5,7
c,3,6,9
a,1,4,5
b,2,5,7
c,3,6,9


### .concat
more efficient than append
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.concat.html?highlight=concat#pandas.concat

In [17]:
pd.concat([tdf, tdf])

Unnamed: 0,A,B,C
a,1,4,5
b,2,5,7
c,3,6,9
a,1,4,5
b,2,5,7
c,3,6,9


In [28]:
pd.concat([tdf, tdf], ignore_index=True)

Unnamed: 0,A,B,C
0,1,4,5
1,2,5,7
2,3,6,9
3,1,4,5
4,2,5,7
5,3,6,9


##### axis : {0/’index’, 1/’columns’}, default 0

    The axis to concatenate along


In [29]:
pd.concat([tdf, tdf], axis=1)

Unnamed: 0,A,B,C,A.1,B.1,C.1
a,1,4,5,1,4,5
b,2,5,7,2,5,7
c,3,6,9,3,6,9


### seclect

In [25]:
dfd

Unnamed: 0,A,B,C
a,1,4,5
b,2,5,7
c,3,6,9


In [14]:
dfd[dfd.A>1]

Unnamed: 0,A,B
b,2,5
c,3,6


In [15]:
dfd.A>1

a    False
b     True
c     True
Name: A, dtype: bool

In [16]:
filter = dfd.A>1
type(filter)

pandas.core.series.Series

In [27]:
# dfd[dfd.C > dfd.A]

Unnamed: 0,A,B,C
a,1,4,5
b,2,5,7
c,3,6,9


### .loc
df.loc[row_indexer,column_indexer]

In [61]:
dates = pd.date_range('1/1/2000', periods=8)

df = pd.DataFrame(np.random.randn(8, 4), index=dates, columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
2000-01-01,-1.170045,1.282054,1.156002,0.116517
2000-01-02,-0.211171,0.687866,-0.335056,0.931112
2000-01-03,-0.092936,-0.326598,0.198455,0.9509
2000-01-04,-0.372172,0.356238,0.46402,-1.085201
2000-01-05,-0.483648,1.749823,0.48737,-0.409341
2000-01-06,0.498361,0.127934,1.54269,-0.103222
2000-01-07,-1.15364,0.877413,-0.447568,0.493444
2000-01-08,1.614527,-0.151379,0.140525,-1.280825


### column index

In [67]:
# df.loc[:]

In [37]:
# note the difference between with and without the brackets
df1 = df.loc[:, ['A']]
print type(df1)
df2 = df.loc[:, 'A']
print type(df2)

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


### row index

In [38]:
filters = df.index>'2000-01-05'
df.loc[filters, :]

Unnamed: 0,A,B,C,D
2000-01-06,0.944588,-0.683064,0.313358,-0.432203
2000-01-07,-1.448672,-0.564808,-1.025435,0.09575
2000-01-08,-0.09316,-0.98016,0.242726,0.110485


##### String likes in slicing can be convertible to the type of the index and lead to natural slicing.

In [39]:
df.loc['20000101':'20000105']

Unnamed: 0,A,B,C,D
2000-01-01,1.519277,1.582809,0.64331,0.9697
2000-01-02,1.141639,2.073245,0.238348,0.504121
2000-01-03,0.29449,-0.345567,0.793849,1.585689
2000-01-04,1.747291,-0.07514,-0.122804,1.125318
2000-01-05,0.649874,0.415495,-1.801074,-0.385598


In [183]:
# df.loc[:3]

##### .index

In [42]:
df.index[[0, 2]]

DatetimeIndex(['2000-01-01', '2000-01-03'], dtype='datetime64[ns]', freq=None)

In [190]:
df.loc[df.index[[0, 2]],['A']]

Unnamed: 0,A
2000-01-01,-0.001379
2000-01-03,-0.295811


### .iloc

In [47]:
df.iloc[:3]

Unnamed: 0,A,B,C,D
2000-01-01,0.490313,-1.826382,-0.847263,0.347326
2000-01-02,0.160241,0.510906,-0.686736,0.43597
2000-01-03,-0.274126,-1.398118,-1.205558,1.423482


In [49]:
df.iloc[:3, 2:4]

Unnamed: 0,C,D
2000-01-01,-0.847263,0.347326
2000-01-02,-0.686736,0.43597
2000-01-03,-1.205558,1.423482


##### .get_loc

In [192]:
df.columns.get_loc('A')

0

In [188]:
df.iloc[[0, 2], [df.columns.get_loc('A')]]

Unnamed: 0,A
2000-01-01,-0.001379
2000-01-03,-0.295811


##### .get_indexer
For getting multiple indexers

In [194]:
df.columns.get_indexer(['A', 'B'])

array([0, 1], dtype=int64)

In [195]:
df.iloc[[0, 2], df.columns.get_indexer(['A', 'B'])]

Unnamed: 0,A,B
2000-01-01,-0.001379,1.274753
2000-01-03,-0.295811,-0.191541


### .loc vs .iloc
https://stackoverflow.com/questions/31593201/pandas-iloc-vs-ix-vs-loc-explanation

In [171]:
df2 = df.reset_index(drop=True)
df2 = df2.sort_values("A")
df2

Unnamed: 0,A,B,C,D
5,-1.029617,-0.518464,0.497126,0.234901
1,-0.779598,0.664122,0.422424,-2.376949
4,-0.323552,0.545701,0.724448,-1.213677
2,-0.295811,-0.191541,-1.380574,0.596739
3,-0.261907,0.07179,-1.758331,-0.666785
0,-0.001379,1.274753,2.213789,-0.87754
6,0.627716,1.396828,-0.105753,-0.279644
7,1.465338,0.419214,0.231632,1.753904


In [172]:
df2.loc[:3]

Unnamed: 0,A,B,C,D
5,-1.029617,-0.518464,0.497126,0.234901
1,-0.779598,0.664122,0.422424,-2.376949
4,-0.323552,0.545701,0.724448,-1.213677
2,-0.295811,-0.191541,-1.380574,0.596739
3,-0.261907,0.07179,-1.758331,-0.666785


In [43]:
# df2.loc[:1, ['A','C']]

In [173]:
df2.iloc[:3]

Unnamed: 0,A,B,C,D
5,-1.029617,-0.518464,0.497126,0.234901
1,-0.779598,0.664122,0.422424,-2.376949
4,-0.323552,0.545701,0.724448,-1.213677


In [44]:
# df2.iloc[:1, :3]

### .ix 
starting from Pandas 0.20.1 the .ix indexer is deprecated, in favor of the more strict .iloc and .loc indexers.

### .ix vs .iloc vs .loc
https://stackoverflow.com/questions/31593201/pandas-iloc-vs-ix-vs-loc-explanation-how-are-they-different

### Different Choices for Indexing
http://pandas.pydata.org/pandas-docs/stable/indexing.html#different-choices-for-indexing

### .reset_index
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.reset_index.html?highlight=reset_index

In [47]:
# df

In [48]:
df.reset_index()

Unnamed: 0,index,A,B,C,D
0,2000-01-01,1.519277,1.582809,0.64331,0.9697
1,2000-01-02,1.141639,2.073245,0.238348,0.504121
2,2000-01-03,0.29449,-0.345567,0.793849,1.585689
3,2000-01-04,1.747291,-0.07514,-0.122804,1.125318
4,2000-01-05,0.649874,0.415495,-1.801074,-0.385598
5,2000-01-06,0.944588,-0.683064,0.313358,-0.432203
6,2000-01-07,-1.448672,-0.564808,-1.025435,0.09575
7,2000-01-08,-0.09316,-0.98016,0.242726,0.110485


##### Q: how to aviod old index being added as a column?

In [49]:
# df.reset_index()

In [50]:
df2 = df.reset_index(drop=True)
df2

Unnamed: 0,A,B,C,D
0,1.519277,1.582809,0.64331,0.9697
1,1.141639,2.073245,0.238348,0.504121
2,0.29449,-0.345567,0.793849,1.585689
3,1.747291,-0.07514,-0.122804,1.125318
4,0.649874,0.415495,-1.801074,-0.385598
5,0.944588,-0.683064,0.313358,-0.432203
6,-1.448672,-0.564808,-1.025435,0.09575
7,-0.09316,-0.98016,0.242726,0.110485


In [180]:
df2.loc[:3]

Unnamed: 0,A,B,C,D
0,-1.029617,-0.518464,0.497126,0.234901
1,-0.779598,0.664122,0.422424,-2.376949
2,-0.323552,0.545701,0.724448,-1.213677
3,-0.295811,-0.191541,-1.380574,0.596739


In [181]:
df2.iloc[:3]

Unnamed: 0,A,B,C,D
0,-1.029617,-0.518464,0.497126,0.234901
1,-0.779598,0.664122,0.422424,-2.376949
2,-0.323552,0.545701,0.724448,-1.213677


### .set_index
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.set_index.html?highlight=set_index

In [51]:
tdf = df.reset_index()
tdf.set_index("index")

Unnamed: 0_level_0,A,B,C,D
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-01,1.519277,1.582809,0.64331,0.9697
2000-01-02,1.141639,2.073245,0.238348,0.504121
2000-01-03,0.29449,-0.345567,0.793849,1.585689
2000-01-04,1.747291,-0.07514,-0.122804,1.125318
2000-01-05,0.649874,0.415495,-1.801074,-0.385598
2000-01-06,0.944588,-0.683064,0.313358,-0.432203
2000-01-07,-1.448672,-0.564808,-1.025435,0.09575
2000-01-08,-0.09316,-0.98016,0.242726,0.110485


In [52]:
tdf = df.reset_index()
tdf.set_index("A")

Unnamed: 0_level_0,index,B,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.519277,2000-01-01,1.582809,0.64331,0.9697
1.141639,2000-01-02,2.073245,0.238348,0.504121
0.29449,2000-01-03,-0.345567,0.793849,1.585689
1.747291,2000-01-04,-0.07514,-0.122804,1.125318
0.649874,2000-01-05,0.415495,-1.801074,-0.385598
0.944588,2000-01-06,-0.683064,0.313358,-0.432203
-1.448672,2000-01-07,-0.564808,-1.025435,0.09575
-0.09316,2000-01-08,-0.98016,0.242726,0.110485


### .merge

In [54]:
dfa = pd.DataFrame({'key': ["foo","bar","baz"],
                    'v1': [4, 5, 6]})
dfb = pd.DataFrame({'key': ["foo","bar","qux"],
                    'v2': [1, 2, 3]})

In [39]:
dfa

Unnamed: 0,key,v1
0,foo,4
1,bar,5
2,baz,6


In [40]:
dfb

Unnamed: 0,key,v2
0,foo,1
1,bar,2
2,qux,3


In [41]:
dfa.merge(dfb, on="key")

Unnamed: 0,key,v1,v2
0,foo,4,1
1,bar,5,2


### trick

##### _merge
indicator

##### Q: How to get rows that appear in dfa but not dfb ? 

In [55]:
pd.merge(dfa, dfb, how='outer',indicator=True)

Unnamed: 0,key,v1,v2,_merge
0,foo,4.0,1.0,both
1,bar,5.0,2.0,both
2,baz,6.0,,left_only
3,qux,,3.0,right_only


### method chaining

In [None]:
pd.merge(dfa, dfb, how='outer',
indicator=True)
.query('_merge == "left_only"')
.drop(['_merge'],axis=1)

### .isin

In [42]:
df = pd.DataFrame({'A': [1, 2, 3], 'B': ['a', 'b', 'f']})
df

Unnamed: 0,A,B
0,1,a
1,2,b
2,3,f


In [43]:
df.isin({"A": [1,3,5]})

Unnamed: 0,A,B
0,True,False
1,False,False
2,True,False


In [45]:
df[df.A.isin([1,3,5])]

Unnamed: 0,A,B
0,1,a
2,3,f


# example

### .read_csv
fund style 20171229 

In [76]:
style = pd.read_csv('20171229.csv')
style.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3089 entries, 0 to 3088
Data columns (total 5 columns):
SECURITY_ID_INT    3089 non-null int64
raw_X              3089 non-null float64
raw_Y              3089 non-null float64
vertical           3089 non-null object
horizontal         3089 non-null object
dtypes: float64(2), int64(1), object(2)
memory usage: 120.7+ KB


In [51]:
style.head(5)

Unnamed: 0,SECURITY_ID_INT,raw_X,raw_Y,vertical,horizontal
0,1734,-106.51273,223.595615,large,value
1,1351,-95.753155,244.328818,large,value
2,2228,-74.231371,318.054976,large,value
3,2296,-57.797931,394.503697,large,value
4,1971,-50.873616,581.97042,large,value


### .sort_values
ascending : default True
inplace : bool, default False

    if True, perform operation in-place


In [82]:
style.sort_values("SECURITY_ID_INT", inplace=True)
style.head(10)

Unnamed: 0,SECURITY_ID_INT,raw_X,raw_Y,vertical,horizontal
40,2,54.546833,432.316244,large,value
409,4,251.404177,469.568402,large,growth
1627,8,241.797552,-7.651046,small,growth
1754,10,310.086762,69.951089,small,growth
2072,12,-8.259759,171.692515,mid,value
1479,14,202.552752,95.194638,small,growth
503,16,319.352339,228.303999,large,growth
2308,17,99.307138,185.975849,mid,value
1829,19,370.285031,71.223713,small,growth
2831,21,245.231469,145.630078,mid,growth


### .reset_index

In [83]:
# style.reset_index(inplace=True)
style.reset_index(inplace=True, drop=True )

# style.head(20)
style

Unnamed: 0,SECURITY_ID_INT,raw_X,raw_Y,vertical,horizontal
0,2,54.546833,432.316244,large,value
1,4,251.404177,469.568402,large,growth
2,8,241.797552,-7.651046,small,growth
3,10,310.086762,69.951089,small,growth
4,12,-8.259759,171.692515,mid,value
5,14,202.552752,95.194638,small,growth
6,16,319.352339,228.303999,large,growth
7,17,99.307138,185.975849,mid,value
8,19,370.285031,71.223713,small,growth
9,21,245.231469,145.630078,mid,growth


### .index

In [129]:
filter = style[style.horizontal=='value'].index
# .indexs()
filter

Int64Index([   0,    4,    7,   12,   18,   21,   22,   25,   27,   37,
            ...
            2965, 2973, 2974, 2980, 2984, 3037, 3048, 3050, 3069, 3076],
           dtype='int64', length=884)

### .drop
axis : {0 or ‘index’, 1 or ‘columns’}, default 0
Whether to drop labels from the index (0 or ‘index’) or columns (1 or ‘columns’).
https://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.drop.html?highlight=drop#pandas.DataFrame.drop

In [130]:
style.drop(filter)

Unnamed: 0,SECURITY_ID_INT,raw_X,raw_Y,vertical,horizontal
1,4,251.404177,469.568402,large,growth
2,8,241.797552,-7.651046,small,growth
3,10,310.086762,69.951089,small,growth
5,14,202.552752,95.194638,small,growth
6,16,319.352339,228.303999,large,growth
8,19,370.285031,71.223713,small,growth
9,21,245.231469,145.630078,mid,growth
10,24,229.214039,213.537403,large,growth
11,27,217.547296,5.712293,small,growth
13,31,307.993202,47.180633,small,growth


In [131]:
style.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3089 entries, 0 to 3088
Data columns (total 5 columns):
SECURITY_ID_INT    3089 non-null int64
raw_X              3089 non-null float64
raw_Y              3089 non-null float64
vertical           3089 non-null object
horizontal         3089 non-null object
dtypes: float64(2), int64(1), object(2)
memory usage: 120.7+ KB


In [59]:
style[style.horizontal=='value']
style[style.horizontal=='value']

Unnamed: 0,SECURITY_ID_INT,raw_X,raw_Y,vertical,horizontal
0,1734,-106.512730,223.595615,large,value
1,1351,-95.753155,244.328818,large,value
2,2228,-74.231371,318.054976,large,value
3,2296,-57.797931,394.503697,large,value
4,1971,-50.873616,581.970420,large,value
5,596,-48.216841,350.745190,large,value
6,1768,-34.912216,264.474972,large,value
7,599,-31.074632,369.757377,large,value
8,1441,-27.497806,311.781603,large,value
9,11293,-21.570914,327.138403,large,value


In [87]:
style.head(5)

Unnamed: 0,SECURITY_ID_INT,raw_X,raw_Y,vertical,horizontal
0,2,54.546833,432.316244,large,value
1,4,251.404177,469.568402,large,growth
2,8,241.797552,-7.651046,small,growth
3,10,310.086762,69.951089,small,growth
4,12,-8.259759,171.692515,mid,value


### .groupby

In [117]:
# to calculate the mean of vertical group
dfgroup = style[['raw_X','raw_Y','vertical']].groupby(['vertical']).agg(['mean', 'std'])
# dfgroup = style.groupby(['vertical']).agg(['mean', 'std'])

dfgroup

Unnamed: 0_level_0,raw_X,raw_X,raw_Y,raw_Y
Unnamed: 0_level_1,mean,std,mean,std
vertical,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
large,200.941888,98.033842,276.059847,76.808567
mid,154.432431,132.723943,142.928277,28.416332
small,161.970242,140.382012,55.174502,28.432147


In [123]:
dfgroup.columns

MultiIndex(levels=[[u'raw_X', u'raw_Y'], [u'mean', u'std']],
           labels=[[0, 0, 1, 1], [0, 1, 0, 1]])

### .droplevel

In [122]:
dfgroup.columns.droplevel(0)
# dfgroup.columns.droplevel(1)

Index([u'mean', u'std', u'mean', u'std'], dtype='object')

In [124]:
tdf = dfgroup
tdf.columns = dfgroup.columns.droplevel(0)
tdf

Unnamed: 0_level_0,mean,std,mean,std
vertical,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
large,200.941888,98.033842,276.059847,76.808567
mid,154.432431,132.723943,142.928277,28.416332
small,161.970242,140.382012,55.174502,28.432147


# reshape
https://pandas.pydata.org/pandas-docs/stable/reshaping.html?highlight=reshape

In [56]:
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
                    'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                    'baz': [1, 2, 3, 4, 5, 6],
                    'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
df

Unnamed: 0,bar,baz,foo,zoo
0,A,1,one,x
1,B,2,one,y
2,C,3,one,z
3,A,4,two,q
4,B,5,two,w
5,C,6,two,t


### stack

In [58]:
df.stack()
# type(df.stack())

pandas.core.series.Series

### pivot
Return reshaped DataFrame organized by given index / column values

In [133]:
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
                    'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
                    'baz': [1, 2, 3, 4, 5, 6],
                    'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
df

Unnamed: 0,bar,baz,foo,zoo
0,A,1,one,x
1,B,2,one,y
2,C,3,one,z
3,A,4,two,q
4,B,5,two,w
5,C,6,two,t


In [135]:
df.pivot(index='foo', columns='bar', values='baz')

bar,A,B,C
foo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,1,2,3
two,4,5,6


In [140]:
df.pivot(index='foo', columns='bar')

Unnamed: 0_level_0,baz,baz,baz,zoo,zoo,zoo
bar,A,B,C,A,B,C
foo,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
one,1,2,3,x,y,z
two,4,5,6,q,w,t


In [141]:
style.head()

Unnamed: 0,SECURITY_ID_INT,raw_X,raw_Y,vertical,horizontal
0,2,54.546833,432.316244,large,value
1,4,251.404177,469.568402,large,growth
2,8,241.797552,-7.651046,small,growth
3,10,310.086762,69.951089,small,growth
4,12,-8.259759,171.692515,mid,value


In [163]:
tdf = style[['raw_X','vertical', "horizontal"]].groupby(['vertical', "horizontal"]).agg(['mean', 'std'])
tdf

Unnamed: 0_level_0,Unnamed: 1_level_0,raw_X,raw_X
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,std
vertical,horizontal,Unnamed: 2_level_2,Unnamed: 3_level_2
large,core,158.110646,28.41136
large,growth,275.452098,59.665463
large,value,37.743257,49.014845
mid,core,149.649329,27.631831
mid,growth,294.043388,82.117999
mid,value,9.578589,72.477051
small,core,148.720487,28.980646
small,growth,310.230074,96.886996
small,value,14.360988,68.333564


In [164]:
tdf.columns = tdf.columns.droplevel(0)
tdf

Unnamed: 0_level_0,Unnamed: 1_level_0,mean,std
vertical,horizontal,Unnamed: 2_level_1,Unnamed: 3_level_1
large,core,158.110646,28.41136
large,growth,275.452098,59.665463
large,value,37.743257,49.014845
mid,core,149.649329,27.631831
mid,growth,294.043388,82.117999
mid,value,9.578589,72.477051
small,core,148.720487,28.980646
small,growth,310.230074,96.886996
small,value,14.360988,68.333564


In [165]:
tdf = tdf.reset_index()
tdf

Unnamed: 0,vertical,horizontal,mean,std
0,large,core,158.110646,28.41136
1,large,growth,275.452098,59.665463
2,large,value,37.743257,49.014845
3,mid,core,149.649329,27.631831
4,mid,growth,294.043388,82.117999
5,mid,value,9.578589,72.477051
6,small,core,148.720487,28.980646
7,small,growth,310.230074,96.886996
8,small,value,14.360988,68.333564


In [167]:
tdf.pivot(index="vertical", columns="horizontal")

Unnamed: 0_level_0,mean,mean,mean,std,std,std
horizontal,core,growth,value,core,growth,value
vertical,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
large,158.110646,275.452098,37.743257,28.41136,59.665463,49.014845
mid,149.649329,294.043388,9.578589,27.631831,82.117999,72.477051
small,148.720487,310.230074,14.360988,28.980646,96.886996,68.333564


### .shift

### .unique