# pandas基本功能
#### 1、重建索引

In [21]:
import numpy as np
import pandas as pd

obj = pd.Series([4.5, 7.2, -5.3, 3.6],index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [22]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [23]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3

0      blue
2    purple
4    yellow
dtype: object

In [24]:
obj3.reindex(range(12), method='ffill')

0       blue
1       blue
2     purple
3     purple
4     yellow
5     yellow
6     yellow
7     yellow
8     yellow
9     yellow
10    yellow
11    yellow
dtype: object

In [25]:
obj3.reindex(range(12), method='bfill')

0       blue
1     purple
2     purple
3     yellow
4     yellow
5        NaN
6        NaN
7        NaN
8        NaN
9        NaN
10       NaN
11       NaN
dtype: object

#### 2、轴向上删除条目

In [26]:
data = pd.DataFrame(np.arange(16).reshape((4,4)),index=['Ohio', 'Colorado', 'Utah', 'New York'], columns=['one', 'two', 'three', 'four'])
data

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


#### 3、索引、选择与过滤

In [27]:
data.loc['Colorado', ['two', 'three']]

two      5
three    6
Name: Colorado, dtype: int32

In [28]:
data.iloc[2,[3,0,1]]

four    11
one      8
two      9
Name: Utah, dtype: int32

In [29]:
data.iloc[2]

one       8
two       9
three    10
four     11
Name: Utah, dtype: int32

#### 4、整数索引

In [30]:
ser = pd.Series(np.arange(3.))
ser

0    0.0
1    1.0
2    2.0
dtype: float64

In [31]:
ser[-1]

KeyError: -1

In [34]:
ser2 = pd.Series(np.arange(3.), index=['a', 'b', 'c'])
ser2

a    0.0
b    1.0
c    2.0
dtype: float64

In [35]:
ser2[-1]

2.0

In [37]:
ser2[-2]

1.0

In [38]:
ser2[-3]

0.0

In [39]:
ser2[-4]

IndexError: index -4 is out of bounds for axis 0 with size 3

#### 5、算术与数据对齐

In [40]:
s1 = pd.Series([7.3, -2.5, 3.4, 1.5], index=['a', 'c', 'd', 'e'])
s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1], index=['a', 'c', 'e', 'f', 'g'])
s1

a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64

In [41]:
s2

a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64

In [42]:
s1+s2

a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64

In [49]:
df1=pd.DataFrame(np.arange(9.).reshape((3,3)), columns=list('bcd'), index=['Ohio', 'Texas', 'Oregon'])
df2=pd.DataFrame(np.arange(12.).reshape((4,3)), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
df1

Unnamed: 0,b,c,d
Ohio,0.0,1.0,2.0
Texas,3.0,4.0,5.0
Oregon,6.0,7.0,8.0


In [50]:
df2

Unnamed: 0,b,d,e
Utah,0.0,1.0,2.0
Ohio,3.0,4.0,5.0
Texas,6.0,7.0,8.0
Oregon,9.0,10.0,11.0


In [51]:
df1+df2

Unnamed: 0,b,c,d,e
Ohio,3.0,,6.0,
Oregon,15.0,,18.0,
Texas,9.0,,12.0,
Utah,,,,


#### 6、函数应用和映射

In [52]:
frame = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'), index=['Utah', 'Ohio', 'Texas', 'Oregon'])
frame

Unnamed: 0,b,d,e
Utah,1.004342,-1.844401,0.135944
Ohio,0.581902,1.642508,-0.794329
Texas,-1.20824,2.291214,0.154116
Oregon,1.038931,1.360151,-0.492406


In [53]:
np.abs(frame)

Unnamed: 0,b,d,e
Utah,1.004342,1.844401,0.135944
Ohio,0.581902,1.642508,0.794329
Texas,1.20824,2.291214,0.154116
Oregon,1.038931,1.360151,0.492406


In [54]:
f = lambda x:x.max() - x.min()
frame.apply(f)

b    2.247171
d    4.135615
e    0.948445
dtype: float64

#### 7、排序和排名

In [55]:
obj = pd.Series(range(4), index=['d', 'a', 'b', 'c'])
obj.sort_index()

a    1
b    2
c    3
d    0
dtype: int64

In [56]:
obj.sort_values()

d    0
a    1
b    2
c    3
dtype: int64

In [57]:
frame = pd.DataFrame(np.arange(8).reshape((2,4)), index=['three', 'one'], columns=['d', 'a', 'b', 'c'])
frame.sort_index()

Unnamed: 0,d,a,b,c
one,4,5,6,7
three,0,1,2,3


In [62]:
frame.sort_values(by='d')

Unnamed: 0,d,a,b,c
three,0,1,2,3
one,4,5,6,7


#### 8、含有重复标签的轴索引

In [65]:
df = pd.DataFrame(np.random.randn(4,3), index=['a', 'a', 'b', 'b'])
df

Unnamed: 0,0,1,2
a,0.001955,0.722968,1.640116
a,0.600658,0.20424,-0.416452
b,-0.714584,1.236249,0.127442
b,-1.949805,-0.404278,0.517956


In [66]:
df.loc['b']

Unnamed: 0,0,1,2
b,-0.714584,1.236249,0.127442
b,-1.949805,-0.404278,0.517956
