In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({'A': [5, 3, None, 4],
                   'B': [None, 2, 4, 3],
                   'C': [4, 3, 8, 5],
                   'D': [5, 4, 2, None]})
df

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0
3,4.0,3.0,5,


In [3]:
# 选择多列, 必须使用列表，否则报错
df[['A', 'C']]

Unnamed: 0,A,C
0,5.0,4
1,3.0,3
2,,8
3,4.0,5


In [5]:
# 按索引选取
df.iloc[:, [0, 2]]

Unnamed: 0,A,C
0,5.0,4
1,3.0,3
2,,8
3,4.0,5


In [6]:
df.iloc[:, 0:2]

Unnamed: 0,A,B
0,5.0,
1,3.0,2.0
2,,4.0
3,4.0,3.0


In [7]:
df.iloc[0:2]

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
1,3.0,2.0,3,4.0


In [8]:
# 按标签选取
df.loc[[0, 2]]

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
2,,4.0,8,2.0


In [9]:
df.loc[0:2]

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0


In [12]:
df.loc[:, 'A':'C']

Unnamed: 0,A,B,C
0,5.0,,4
1,3.0,2.0,3
2,,4.0,8
3,4.0,3.0,5


In [13]:
df

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0
3,4.0,3.0,5,


In [18]:
# 比较运算符优先级小于位运算优先级，所以必须加括号。
df[(df['A'] < 5) & (df['C'] < 4)]

Unnamed: 0,A,B,C,D
1,3.0,2.0,3,4.0


In [20]:
import numpy as np
print(np.nan == 1)
print(np.nan > 1)
print(np.nan < 1)

False
False
False


In [21]:
# 数值替换
df['C'].replace(4, 40)

0    40
1     3
2     8
3     5
Name: C, dtype: int64

In [22]:
df.replace(np.nan, 0)

Unnamed: 0,A,B,C,D
0,5.0,0.0,4,5.0
1,3.0,2.0,3,4.0
2,0.0,4.0,8,2.0
3,4.0,3.0,5,0.0


In [23]:
df.fillna(0)

Unnamed: 0,A,B,C,D
0,5.0,0.0,4,5.0
1,3.0,2.0,3,4.0
2,0.0,4.0,8,2.0
3,4.0,3.0,5,0.0


In [24]:
# 多对一替换
df.replace([4, 5, 8], 1000)

Unnamed: 0,A,B,C,D
0,1000.0,,1000,1000.0
1,3.0,2.0,3,1000.0
2,,1000.0,1000,2.0
3,1000.0,3.0,1000,


In [31]:
# 多对多, 使用字典，或使用相同长度的两个列表
df.replace([3, 4, 5], [300, 400, 500])

Unnamed: 0,A,B,C,D
0,500.0,,400,500.0
1,300.0,2.0,300,400.0
2,,400.0,8,2.0
3,400.0,300.0,500,


In [32]:
df.replace({3: 300, 4: 400, 5: 500})

Unnamed: 0,A,B,C,D
0,500.0,,400,500.0
1,300.0,2.0,300,400.0
2,,400.0,8,2.0
3,400.0,300.0,500,


In [33]:
df

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0
3,4.0,3.0,5,


In [38]:
# 排序, 默认由小到大排序，nan 总是在最后面
df.sort_values(by=['A'], ascending=False)

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
3,4.0,3.0,5,
1,3.0,2.0,3,4.0
2,,4.0,8,2.0


In [39]:
# 多列排序
df.sort_values(by=['A', 'C'], ascending=[True, False])

Unnamed: 0,A,B,C,D
1,3.0,2.0,3,4.0
3,4.0,3.0,5,
0,5.0,,4,5.0
2,,4.0,8,2.0


In [40]:
# 删除，按标签删除
df.drop('A', axis=1)

Unnamed: 0,B,C,D
0,,4,5.0
1,2.0,3,4.0
2,4.0,8,2.0
3,3.0,5,


In [41]:
df.drop(3)

Unnamed: 0,A,B,C,D
0,5.0,,4,5.0
1,3.0,2.0,3,4.0
2,,4.0,8,2.0


In [42]:
df.T

Unnamed: 0,0,1,2,3
A,5.0,3.0,,4.0
B,,2.0,4.0,3.0
C,4.0,3.0,8.0,5.0
D,5.0,4.0,2.0,


In [43]:
df.T.T

Unnamed: 0,A,B,C,D
0,5.0,,4.0,5.0
1,3.0,2.0,3.0,4.0
2,,4.0,8.0,2.0
3,4.0,3.0,5.0,


In [45]:
# 索引重塑
df4 = pd.DataFrame([
                     ['a', 'b', 'c'],
                     ['d', 'e', 'f']
                    ],
                    columns= ['one', 'two', 'three'],
                    index = ['first', 'second']
                   )
df4

Unnamed: 0,one,two,three
first,a,b,c
second,d,e,f


In [46]:
df4.stack()

first   one      a
        two      b
        three    c
second  one      d
        two      e
        three    f
dtype: object

In [47]:
df4.unstack()

one    first     a
       second    d
two    first     b
       second    e
three  first     c
       second    f
dtype: object

In [48]:
df4.stack().reset_index()

Unnamed: 0,level_0,level_1,0
0,first,one,a
1,first,two,b
2,first,three,c
3,second,one,d
4,second,two,e
5,second,three,f
