# DataFrame 的常用操作一
1. 转置
2. 更改类型
3. 排序
4. 值替换

In [15]:
import pandas as pd

In [29]:
data = {'性别':['男','女','女','男','男'],
        '姓名':['小明','小红','小芳','大黑','张三'],
        '身高':[178,173,165,188,156],
        '年龄':[20,20,25,24,29]}

df = pd.DataFrame(data)

In [30]:
df

Unnamed: 0,姓名,年龄,性别,身高
0,小明,20,男,178
1,小红,20,女,173
2,小芳,25,女,165
3,大黑,24,男,188
4,张三,29,男,156


In [18]:
df.columns

Index(['姓名', '年龄', '性别', '身高'], dtype='object')

In [22]:
# 修改列索引
df.columns=['name','age','sex','height']

In [23]:
df

Unnamed: 0,name,age,sex,height
0,小明,20,男,178
1,小红,20,女,173
2,小芳,25,女,165
3,大黑,24,男,188
4,张三,29,男,156


In [26]:
df.age

0    20
1    20
2    25
3    24
4    29
Name: age, dtype: int64

In [28]:
?df.rename

In [34]:
# 使用 rename 函数进行修改列索引
df.rename(columns={'姓名':'name', '年龄':'age', '性别':'sex','身高':'height'},inplace=True)

In [35]:
df

Unnamed: 0,name,age,sex,height
0,小明,20,男,178
1,小红,20,女,173
2,小芳,25,女,165
3,大黑,24,男,188
4,张三,29,男,156


In [37]:
?df.replace

In [40]:
# 替换一个值
df.replace({'name':{'小明':'xiaoming'}},inplace=True)

In [41]:
df

Unnamed: 0,name,age,sex,height
0,xiaoming,20,男,178
1,小红,20,女,173
2,小芳,25,女,165
3,大黑,24,男,188
4,张三,29,男,156


In [42]:
# 查看一列有多少种值
df.age.unique()

array([20, 25, 24, 29], dtype=int64)

In [43]:
df.age.value_counts()

20    2
29    1
25    1
24    1
Name: age, dtype: int64

In [44]:
# 对数据进行排序
df.sort_values('age')

Unnamed: 0,name,age,sex,height
0,xiaoming,20,男,178
1,小红,20,女,173
3,大黑,24,男,188
2,小芳,25,女,165
4,张三,29,男,156


In [47]:
?df.sort_values

In [48]:
df.sort_values(['age','height'],ascending=[True,False])

Unnamed: 0,name,age,sex,height
0,xiaoming,20,男,178
1,小红,20,女,173
3,大黑,24,男,188
2,小芳,25,女,165
4,张三,29,男,156


In [49]:
# 求极值, axis =0 表示列的最大值,axis =1 其实没什么用，因为如果有身高和年龄，那肯定每一行都是身高最大了
df.max()

name       张三
age        29
sex         男
height    188
dtype: object

In [50]:
df.min()

name      xiaoming
age             20
sex              女
height         156
dtype: object

In [51]:
df.sum()

name      xiaoming小红小芳大黑张三
age                    118
sex                  男女女男男
height                 860
dtype: object

# DataFrame 的常用操作二
1. 累加求和
2. 增加、删除
3. 修改增加值
4. 值替换

In [53]:
df

Unnamed: 0,name,age,sex,height
0,xiaoming,20,男,178
1,小红,20,女,173
2,小芳,25,女,165
3,大黑,24,男,188
4,张三,29,男,156


In [52]:
# 累加求和
df.cumsum(0)

Unnamed: 0,name,age,sex,height
0,xiaoming,20,男,178
1,xiaoming小红,40,男女,351
2,xiaoming小红小芳,65,男女女,516
3,xiaoming小红小芳大黑,89,男女女男,704
4,xiaoming小红小芳大黑张三,118,男女女男男,860


In [54]:
# 增加一列
df['player'] = 1
df

Unnamed: 0,name,age,sex,height,player
0,xiaoming,20,男,178,1
1,小红,20,女,173,1
2,小芳,25,女,165,1
3,大黑,24,男,188,1
4,张三,29,男,156,1


In [55]:
# 删除一列
del df['player']
df

Unnamed: 0,name,age,sex,height
0,xiaoming,20,男,178
1,小红,20,女,173
2,小芳,25,女,165
3,大黑,24,男,188
4,张三,29,男,156


In [56]:
# 删除行，labels 是行列的名字，默认axis=0删除行，为1则删除列
df.drop(labels=0)

Unnamed: 0,name,age,sex,height
1,小红,20,女,173
2,小芳,25,女,165
3,大黑,24,男,188
4,张三,29,男,156


In [57]:
# 修改一列的值，使用 map 函数
df.sex = df['sex'].map({'男':'female','女':'male'})

In [58]:
df

Unnamed: 0,name,age,sex,height
0,xiaoming,20,female,178
1,小红,20,male,173
2,小芳,25,male,165
3,大黑,24,female,188
4,张三,29,female,156


In [59]:
# 还可以使用 replace 函数进行替换
df.sex.replace('female','男',inplace=True)
df.sex.replace('male','女',inplace=True)

In [60]:
df

Unnamed: 0,name,age,sex,height
0,xiaoming,20,男,178
1,小红,20,女,173
2,小芳,25,女,165
3,大黑,24,男,188
4,张三,29,男,156


# DataFrame 的常用操作三
1. 最值得索引位置
2. 矩阵的运算

In [63]:
# 最值得索引位置
import numpy as np

df=pd.DataFrame(np.random.random((5,10)),columns=list('abcdefghij'))
df

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
0,0.896438,0.841122,0.238113,0.337115,0.478989,0.468485,0.042771,0.479454,0.276876,0.426447
1,0.357495,0.968991,0.888388,0.839632,0.325568,0.233114,0.121236,0.656806,0.212282,0.207293
2,0.916775,0.411626,0.574248,0.973014,0.959052,0.512999,0.308992,0.258579,0.704856,0.9967
3,0.256592,0.171047,0.881406,0.616196,0.525863,0.913471,0.206393,0.894393,0.120939,0.151127
4,0.967306,0.567964,0.459455,0.328294,0.358375,0.530322,0.186378,0.580079,0.164235,0.657795


In [65]:
df.idxmax()

a    4
b    1
c    1
d    2
e    2
f    3
g    2
h    3
i    2
j    2
dtype: int64

In [66]:
# 矩阵间得运算
# a.相同维度得矩阵加减乘除就是对应元素进行操作
df1=pd.DataFrame(np.random.random((5,10)),columns=list('abcdefghij'))
df2=pd.DataFrame(np.random.random((5,10)),columns=list('abcdefghij'))

In [67]:
df1

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
0,0.411104,0.407819,0.211764,0.093835,0.727592,0.890875,0.729886,0.879476,0.218267,0.18854
1,0.446697,0.266577,0.181073,0.590714,0.595073,0.770989,0.180824,0.550413,0.524416,0.065004
2,0.985943,0.743149,0.675071,0.697527,0.574375,0.785474,0.305276,0.071509,0.442852,0.943425
3,0.121891,0.673413,0.544299,0.735409,0.499582,0.73079,0.321173,0.358319,0.187938,0.520983
4,0.313272,0.92257,0.96132,0.271629,0.844004,0.82119,0.733437,0.967214,0.411191,0.973366


In [68]:
df2

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
0,0.164637,0.750084,0.816663,0.553939,0.460628,0.213314,0.655773,0.273625,0.405629,0.228852
1,0.459847,0.804855,0.389686,0.227647,0.971335,0.881055,0.180769,0.352126,0.743946,0.415922
2,0.638677,0.391707,0.376307,0.207265,0.773846,0.918388,0.506947,0.728748,0.966303,0.688333
3,0.740952,0.114735,0.838616,0.742224,0.295083,0.858057,0.04092,0.197818,0.153207,0.48951
4,0.673658,0.245425,0.391593,0.452082,0.16002,0.958444,0.506916,0.930694,0.475094,0.934275


In [72]:
df1-10

Unnamed: 0,a,b,c,d,e,f,g,h,i,j
0,-9.588896,-9.592181,-9.788236,-9.906165,-9.272408,-9.109125,-9.270114,-9.120524,-9.781733,-9.81146
1,-9.553303,-9.733423,-9.818927,-9.409286,-9.404927,-9.229011,-9.819176,-9.449587,-9.475584,-9.934996
2,-9.014057,-9.256851,-9.324929,-9.302473,-9.425625,-9.214526,-9.694724,-9.928491,-9.557148,-9.056575
3,-9.878109,-9.326587,-9.455701,-9.264591,-9.500418,-9.26921,-9.678827,-9.641681,-9.812062,-9.479017
4,-9.686728,-9.07743,-9.03868,-9.728371,-9.155996,-9.17881,-9.266563,-9.032786,-9.588809,-9.026634
