# 重塑和轴向转换

## 重塑层次化索引

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [2]:
data = DataFrame(np.arange(6).reshape((2, 3)),
                index=pd.Index(['Ohio', 'Colorado'], name='state'),
                columns=pd.Index(['one', 'two', 'three'], name='number'))

In [3]:
data

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [4]:
result = data.stack()

In [5]:
result

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int64

In [6]:
result.index

MultiIndex(levels=[['Ohio', 'Colorado'], ['one', 'two', 'three']],
           labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
           names=['state', 'number'])

In [7]:
result.unstack()

number,one,two,three
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,0,1,2
Colorado,3,4,5


In [8]:
result.unstack(0)

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [9]:
result.unstack('state')

state,Ohio,Colorado
number,Unnamed: 1_level_1,Unnamed: 2_level_1
one,0,3
two,1,4
three,2,5


In [10]:
s1 = Series([0, 1, 2, 3], index=['a', 'b', 'c', 'd'])

In [11]:
s2 = Series([4, 5, 6], index=['c', 'd', 'e'])

In [12]:
data2 = pd.concat([s1, s2], keys=['one', 'two'])

In [13]:
s1

a    0
b    1
c    2
d    3
dtype: int64

In [14]:
s2

c    4
d    5
e    6
dtype: int64

In [15]:
data2

one  a    0
     b    1
     c    2
     d    3
two  c    4
     d    5
     e    6
dtype: int64

In [16]:
data2.unstack()

Unnamed: 0,a,b,c,d,e
one,0.0,1.0,2.0,3.0,
two,,,4.0,5.0,6.0


In [17]:
data2.unstack().stack()

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
two  c    4.0
     d    5.0
     e    6.0
dtype: float64

In [18]:
data2.unstack().stack(dropna=False)

one  a    0.0
     b    1.0
     c    2.0
     d    3.0
     e    NaN
two  a    NaN
     b    NaN
     c    4.0
     d    5.0
     e    6.0
dtype: float64

In [19]:
df = DataFrame({'left': result, 'right': result + 5},
              columns=pd.Index(['left', 'fight'], name='side'))

In [20]:
result

state     number
Ohio      one       0
          two       1
          three     2
Colorado  one       3
          two       4
          three     5
dtype: int64

In [21]:
df

Unnamed: 0_level_0,side,left,fight
state,number,Unnamed: 2_level_1,Unnamed: 3_level_1
Ohio,one,0,
Ohio,two,1,
Ohio,three,2,
Colorado,one,3,
Colorado,two,4,
Colorado,three,5,


In [22]:
df.unstack('state')

side,left,left,fight,fight
state,Ohio,Colorado,Ohio,Colorado
number,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
one,0,3,,
two,1,4,,
three,2,5,,


In [23]:
df.unstack('state').stack('side')

Unnamed: 0_level_0,state,Ohio,Colorado
number,side,Unnamed: 2_level_1,Unnamed: 3_level_1
one,left,0,3
two,left,1,4
three,left,2,5


## 将“长数据”旋转为“宽格式”

In [24]:
import tushare as ts

In [25]:
data = ts.get_k_data('000001',start='2017-01-01', end='2017-06-30')

In [26]:
data

Unnamed: 0,date,open,close,high,low,volume,code
0,2017-01-03,8.979,9.028,9.048,8.959,459840.0,000001
1,2017-01-04,9.018,9.028,9.048,9.008,449329.0,000001
2,2017-01-05,9.038,9.038,9.048,9.018,344372.0,000001
3,2017-01-06,9.038,8.999,9.038,8.979,358154.0,000001
4,2017-01-09,8.999,9.018,9.038,8.979,361081.0,000001
5,2017-01-10,9.018,9.018,9.028,9.008,241053.0,000001
6,2017-01-11,9.008,9.008,9.038,8.999,303430.0,000001
7,2017-01-12,8.999,9.018,9.038,8.999,428006.0,000001
8,2017-01-13,9.008,9.028,9.058,8.989,434301.0,000001
9,2017-01-16,9.018,9.008,9.028,8.939,683165.0,000001


In [27]:
help(DataFrame.pivot)

Help on function pivot in module pandas.core.frame:

pivot(self, index=None, columns=None, values=None)
    Reshape data (produce a "pivot" table) based on column values. Uses
    unique values from index / columns to form axes of the resulting
    DataFrame.
    
    Parameters
    ----------
    index : string or object, optional
        Column name to use to make new frame's index. If None, uses
        existing index.
    columns : string or object
        Column name to use to make new frame's columns
    values : string or object, optional
        Column name to use for populating new frame's values. If not
        specified, all remaining columns will be used and the result will
        have hierarchically indexed columns
    
    Returns
    -------
    pivoted : DataFrame
    
    See also
    --------
    DataFrame.pivot_table : generalization of pivot that can handle
        duplicate values for one index/column pair
    DataFrame.unstack : pivot based on the index values in

In [28]:
symbol = np.where(data['open'] > 9, 1, 0)

In [29]:
data['symbol'] = symbol

In [30]:
data

Unnamed: 0,date,open,close,high,low,volume,code,symbol
0,2017-01-03,8.979,9.028,9.048,8.959,459840.0,000001,0
1,2017-01-04,9.018,9.028,9.048,9.008,449329.0,000001,1
2,2017-01-05,9.038,9.038,9.048,9.018,344372.0,000001,1
3,2017-01-06,9.038,8.999,9.038,8.979,358154.0,000001,1
4,2017-01-09,8.999,9.018,9.038,8.979,361081.0,000001,0
5,2017-01-10,9.018,9.018,9.028,9.008,241053.0,000001,1
6,2017-01-11,9.008,9.008,9.038,8.999,303430.0,000001,1
7,2017-01-12,8.999,9.018,9.038,8.999,428006.0,000001,0
8,2017-01-13,9.008,9.028,9.058,8.989,434301.0,000001,1
9,2017-01-16,9.018,9.008,9.028,8.939,683165.0,000001,1


In [38]:
pivoted = data.pivot(index='date', columns='symbol', values='close')

In [39]:
pivoted.head()

symbol,0,1
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-03,9.028,
2017-01-04,,9.028
2017-01-05,,9.038
2017-01-06,,8.999
2017-01-09,9.018,


In [40]:
data['value'] = np.random.randn(len(data))

In [41]:
data[:10]

Unnamed: 0,date,open,close,high,low,volume,code,symbol,value
0,2017-01-03,8.979,9.028,9.048,8.959,459840.0,1,0,-1.324991
1,2017-01-04,9.018,9.028,9.048,9.008,449329.0,1,1,0.120866
2,2017-01-05,9.038,9.038,9.048,9.018,344372.0,1,1,-2.625203
3,2017-01-06,9.038,8.999,9.038,8.979,358154.0,1,1,-0.298291
4,2017-01-09,8.999,9.018,9.038,8.979,361081.0,1,0,-1.326505
5,2017-01-10,9.018,9.018,9.028,9.008,241053.0,1,1,0.070325
6,2017-01-11,9.008,9.008,9.038,8.999,303430.0,1,1,-0.20555
7,2017-01-12,8.999,9.018,9.038,8.999,428006.0,1,0,0.44409
8,2017-01-13,9.008,9.028,9.058,8.989,434301.0,1,1,-0.101912
9,2017-01-16,9.018,9.008,9.028,8.939,683165.0,1,1,-0.532677


In [42]:
pivoted = data.pivot('date', 'symbol')

In [43]:
pivoted[:5]

Unnamed: 0_level_0,open,open,close,close,high,high,low,low,volume,volume,code,code,value,value
symbol,0,1,0,1,0,1,0,1,0,1,0,1,0,1
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
2017-01-03,8.979,,9.028,,9.048,,8.959,,459840.0,,1.0,,-1.324991,
2017-01-04,,9.018,,9.028,,9.048,,9.008,,449329.0,,1.0,,0.120866
2017-01-05,,9.038,,9.038,,9.048,,9.018,,344372.0,,1.0,,-2.625203
2017-01-06,,9.038,,8.999,,9.038,,8.979,,358154.0,,1.0,,-0.298291
2017-01-09,8.999,,9.018,,9.038,,8.979,,361081.0,,1.0,,-1.326505,


In [44]:
pivoted['close'][:5]

symbol,0,1
date,Unnamed: 1_level_1,Unnamed: 2_level_1
2017-01-03,9.028,
2017-01-04,,9.028
2017-01-05,,9.038
2017-01-06,,8.999
2017-01-09,9.018,


In [48]:
unstacked = data.set_index(['date', 'symbol']).unstack('symbol')

In [49]:
unstacked[:7]

Unnamed: 0_level_0,open,open,close,close,high,high,low,low,volume,volume,code,code,value,value
symbol,0,1,0,1,0,1,0,1,0,1,0,1,0,1
date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
2017-01-03,8.979,,9.028,,9.048,,8.959,,459840.0,,1.0,,-1.324991,
2017-01-04,,9.018,,9.028,,9.048,,9.008,,449329.0,,1.0,,0.120866
2017-01-05,,9.038,,9.038,,9.048,,9.018,,344372.0,,1.0,,-2.625203
2017-01-06,,9.038,,8.999,,9.038,,8.979,,358154.0,,1.0,,-0.298291
2017-01-09,8.999,,9.018,,9.038,,8.979,,361081.0,,1.0,,-1.326505,
2017-01-10,,9.018,,9.018,,9.028,,9.008,,241053.0,,1.0,,0.070325
2017-01-11,,9.008,,9.008,,9.038,,8.999,,303430.0,,1.0,,-0.20555
