In [4]:
import pandas as pd
import numpy as np
from pandas import DataFrame

#### 替换操作
  - 替换操作可以同步作用于Series和DataFrame中
  - 单值替换
    - 普通替换: 替换所有符合要求的元素: to_replace=15,value='e'
    - 按照指定单值替换: to_replace={列标签: 替换值} value='value'
  - 多值替换
    - 列表替换: to_replace=[] value=[]
    - 字典替换(推荐) to_replace=(to_replace:value,to_replace:value)

In [5]:
df = DataFrame(np.random.randint(0,100,size=(5,6)))
df

Unnamed: 0,0,1,2,3,4,5
0,96,8,81,7,26,20
1,82,87,18,59,22,13
2,97,60,4,89,62,34
3,83,59,98,57,16,5
4,90,67,83,4,89,15


In [6]:
df.replace(to_replace=16,value='Two') # 把16的值都进行替换

Unnamed: 0,0,1,2,3,4,5
0,96,8,81,7,26,20
1,82,87,18,59,22,13
2,97,60,4,89,62,34
3,83,59,98,57,Two,5
4,90,67,83,4,89,15


In [7]:
df.replace(to_replace={62:'John'}) # 把62的值都进行替换 

Unnamed: 0,0,1,2,3,4,5
0,96,8,81,7,26,20
1,82,87,18,59,22,13
2,97,60,4,89,John,34
3,83,59,98,57,16,5
4,90,67,83,4,89,15


In [8]:
df.replace(to_replace={2:17},value='five') # 将2列的17替换为five
# 将指定列的元素进行替换to_replace={列标签: 被替换值} value='value'

Unnamed: 0,0,1,2,3,4,5
0,96,8,81,7,26,20
1,82,87,18,59,22,13
2,97,60,4,89,62,34
3,83,59,98,57,16,5
4,90,67,83,4,89,15


### 映射操作
  - 概念: 创建一个映射关系列表,把values元素和一个特定的标签或者字符串绑定(给一个元素值提供不同的表现形式)
  - 创建一个df,两列分别是姓名和薪资,然后给其名字起对应的英文名

In [9]:
dic = {
    'name': ['John','Jack','Tom','Marry','Zack'],
    'salary': [10000,4500,8000,6000,7000]
}
df = DataFrame(dic)
df

Unnamed: 0,name,salary
0,John,10000
1,Jack,4500
2,Tom,8000
3,Marry,6000
4,Zack,7000


In [10]:
# 创建一个映射关系列表
dic = {
    'John': '约翰',
    'Jack': '杰克',
    'Tom': '汤姆',
    'Marry': '玛丽',
    'Zack': '扎克'
}
df['e_name'] = df['name'].map(dic) # 将name列的值进行映射

- map是一个Series的方法,只能被Series调用

### 运算工具
  - 超过3000部分的钱缴纳50%的税,计算每个人的税后薪资

In [11]:
df

Unnamed: 0,name,salary,e_name
0,John,10000,约翰
1,Jack,4500,杰克
2,Tom,8000,汤姆
3,Marry,6000,玛丽
4,Zack,7000,扎克


In [12]:
def after_sal(s):
    return s - (s-3000)*0.5
df['salary'].map(after_sal) # salary的每个元素都会被传入到after_sal函数中

0    6500.0
1    3750.0
2    5500.0
3    4500.0
4    5000.0
Name: salary, dtype: float64

### 排序实现的随机抽样
  - take()
  - np.random.permutation()

In [13]:
df = DataFrame(np.random.randint(0,100,size=(100,3)),columns=['A','B','C'])
df

Unnamed: 0,A,B,C
0,42,86,24
1,55,22,82
2,74,54,86
3,51,97,19
4,61,11,72
...,...,...,...
95,18,75,42
96,66,79,47
97,20,81,94
98,62,32,64


In [16]:
np.random.permutation(100) # 生成一个0-99的随机排列

array([ 5, 89, 63, 65, 31, 66, 39, 46, 60,  2, 79, 32, 73, 29,  7, 81, 64,
       54, 98, 90, 15, 10, 62, 11, 91, 40, 33, 84, 28, 41, 83, 87, 68, 22,
        8, 94, 43, 55,  3, 75, 58, 77, 59,  6, 76, 45, 86, 96, 85, 74, 51,
       13,  4, 82, 71, 50, 42, 20, 19,  0, 48, 70, 88, 69, 78, 12, 67, 44,
       61, 36, 35,  1, 24, 18, 57, 93, 17, 95, 99, 38,  9, 56, 47, 25, 49,
       16, 92, 26, 72, 14, 21, 52, 27, 37, 53, 30, 34, 97, 80, 23])

In [15]:
# 将原始数据打乱
df.take([2,0,1],axis=1) # axis=1表示按照列进行打乱

Unnamed: 0,C,A,B
0,24,42,86
1,82,55,22
2,86,74,54
3,19,51,97
4,72,61,11
...,...,...,...
95,42,18,75
96,47,66,79
97,94,20,81
98,64,62,32


In [21]:
# 将行和列都打乱,并且随机抽取50行
df.take(np.random.permutation(3),axis=1).take(np.random.permutation(100),axis=0)[0:50]

Unnamed: 0,A,C,B
59,39,10,84
32,37,2,81
66,57,10,80
92,15,34,12
75,48,40,63
1,55,82,22
25,57,22,2
27,17,71,47
70,1,60,46
87,52,6,54
