In [11]:
from pandas import Series, DataFrame
import pandas as pd
import numpy as np

In [2]:
obj = Series(range(3), index=['a', 'b', 'c'])
index = obj.index
index

Index(['a', 'b', 'c'], dtype='object')

In [3]:
index[1:]

Index(['b', 'c'], dtype='object')

In [5]:
# immutable 
#index[1] = 'd'

In [6]:
# 重新索引
obj = Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [7]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [8]:
obj.reindex(['a', 'b', 'c', 'd', 'e'], fill_value=0)

a   -5.3
b    7.2
c    3.6
d    4.5
e    0.0
dtype: float64

In [9]:
# 插值处理
# ffill / pad 前向填充 ； bfill / backfill 后向填充
obj3 = Series(['blue', 'purple', 'yellow'], index=[0, 2, 4])
obj3.reindex(range(6), method='ffill')

0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [14]:
frame = DataFrame(np.arange(9).reshape((3, 3)), index = ['a', 'c', 'd'],
                  columns = ['Ohio', 'Texas', 'California'])
frame

Unnamed: 0,Ohio,Texas,California
a,0,1,2
c,3,4,5
d,6,7,8


In [15]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2

Unnamed: 0,Ohio,Texas,California
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [20]:
# 使用关键字columns重新索引列
states = ['Texas', 'Utah', 'California']
frame.reindex(columns=states, fill_value=-1)

Unnamed: 0,Texas,Utah,California
a,1,-1,2
c,4,-1,5
d,7,-1,8


In [21]:
# 丢弃指定轴上的项
obj = Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
new_obj = obj.drop('c')
new_obj

a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64

In [22]:
obj.drop(['d', 'c'])

a    0.0
b    1.0
e    4.0
dtype: float64

In [24]:
data = DataFrame(np.arange(16).reshape((4, 4)),
                 index=['Ohio', 'Calorado', 'Utah', 'New York'],
                 columns=['one', 'two', 'three', 'four'])
data.drop(['Calorado', 'Ohio'])

Unnamed: 0,one,two,three,four
Utah,8,9,10,11
New York,12,13,14,15


In [25]:
# 函数应用和映射
frame = DataFrame(np.random.randn(4, 3), columns=list('bde'),
                  index=['Utah', 'Ohio', 'Texas', 'Oregon'])
frame

Unnamed: 0,b,d,e
Utah,-1.234626,-0.639713,1.952915
Ohio,0.196104,-1.780844,1.268854
Texas,-1.016911,1.353677,0.015723
Oregon,-1.88292,-1.474689,-0.401507


In [26]:
np.abs(frame)

Unnamed: 0,b,d,e
Utah,1.234626,0.639713,1.952915
Ohio,0.196104,1.780844,1.268854
Texas,1.016911,1.353677,0.015723
Oregon,1.88292,1.474689,0.401507


In [27]:
f = lambda x: x.max() - x.min()
frame.apply(f)

b    2.079025
d    3.134521
e    2.354422
dtype: float64

In [28]:
frame.apply(f, axis=1)

Utah      3.187541
Ohio      3.049698
Texas     2.370589
Oregon    1.481413
dtype: float64

In [29]:
def f(x):
    return Series([x.min(), x.max()], index=['min', 'max'])
frame.apply(f)

Unnamed: 0,b,d,e
min,-1.88292,-1.780844,-0.401507
max,0.196104,1.353677,1.952915


In [30]:
format = lambda x: '%.2f' %x
frame.applymap(format)

Unnamed: 0,b,d,e
Utah,-1.23,-0.64,1.95
Ohio,0.2,-1.78,1.27
Texas,-1.02,1.35,0.02
Oregon,-1.88,-1.47,-0.4


In [31]:
frame['e'].map(format)

Utah       1.95
Ohio       1.27
Texas      0.02
Oregon    -0.40
Name: e, dtype: object