In [1]:
import pandas as pd

df = pd.DataFrame({
    'A': [1, 2, 3, 4],
    'B': [5, 6, 7, 8],
    'C': [9, 10, 11, 12]}
)

# 过滤出列A中大于2的行
filtered_df = df[df['A'] > 2]
filtered_df

Unnamed: 0,A,B,C
2,3,7,11
3,4,8,12


In [3]:
import pandas as pd

df = pd.DataFrame({
    'A': [1, 2, 3, 4],
    'B': [5, 6, 7, 8],
    'C': [9, 10, 11, 12]}
)

# 过滤出列A中大于2的行
filtered_df = df[df['A'] % 3 ==0]
filtered_df

Unnamed: 0,A,B,C
2,3,7,11


In [4]:
filtered_df = df.query('A > 2')
filtered_df

Unnamed: 0,A,B,C
2,3,7,11
3,4,8,12


In [6]:
filtered_df = df.query('A % 2 == 0')
filtered_df

Unnamed: 0,A,B,C
1,2,6,10
3,4,8,12


In [8]:
df[df['A'] <= 2].index

Index([0, 1], dtype='int64')

In [7]:
# 删除列A中小于等于2的行
df_dropped = df.drop(df[df['A'] <= 2].index)
df_dropped

Unnamed: 0,A,B,C
2,3,7,11
3,4,8,12


In [9]:
# 删除列A中小于等于2的行
df_dropped = df.drop([0,1])
df_dropped

Unnamed: 0,A,B,C
2,3,7,11
3,4,8,12


In [10]:
# 使用loc[]过滤列A中大于2的行
filtered_df = df.loc[df['A'] > 2]
filtered_df

Unnamed: 0,A,B,C
2,3,7,11
3,4,8,12


In [11]:
# 选择列'A'大于2且列'B'小于8的所有行
filtered_df = df.loc[(df['A'] > 2) & (df['B'] < 8)]
filtered_df

Unnamed: 0,A,B,C
2,3,7,11


In [12]:
# 选择前两行数据
first_two_rows = df.iloc[:2]
first_two_rows

Unnamed: 0,A,B,C
0,1,5,9
1,2,6,10


In [13]:
# 选择除了列'A'之外的所有列
all_but_a = df.iloc[:, 1:]
all_but_a

Unnamed: 0,B,C
0,5,9
1,6,10
2,7,11
3,8,12


In [14]:
# 过滤出列B中值为5或7的行
filtered_df = df[df['B'].isin([5, 7])]
filtered_df

Unnamed: 0,A,B,C
0,1,5,9
2,3,7,11


In [15]:
df

Unnamed: 0,A,B,C
0,1,5,9
1,2,6,10
2,3,7,11
3,4,8,12


In [17]:
df.isin([5, 7])

Unnamed: 0,A,B,C
0,False,True,False
1,False,False,False
2,False,True,False
3,False,False,False


In [19]:
import pandas as pd
import numpy as np

df = pd.DataFrame({
    'A': [1, 2, 3, 4],
    'B': [5, 6, np.nan, 8],
    'C': [9, np.nan, 11, 12]}
)

# 过滤出列C中非空的行
filtered_df = df[df['C'].notna()]
filtered_df

Unnamed: 0,A,B,C
0,1,5.0,9.0
2,3,,11.0
3,4,8.0,12.0


In [21]:
filtered_df = df[~df['C'].notna()]
filtered_df

Unnamed: 0,A,B,C
1,2,6.0,


In [22]:
import pandas as pd

df = pd.DataFrame({
    'A': [1, 2, 3, 4],
    'B': [5, 6, 7, 8],
    'C': [9, 10, 11, 12]}
)
# 过滤出列A中值在2和4之间的行
filtered_df = df[df['A'].between(2, 4)]
filtered_df

Unnamed: 0,A,B,C
1,2,6,10
2,3,7,11
3,4,8,12


In [25]:
import pandas as pd

df = pd.DataFrame({
    'A': [1, 2, 3, 4],
    'B': [5, 6, 7, 8],
    'C': [9, 10, 11, 12]}
)

# 过滤出列A中值在2和4之间的行,只包含左边界值
filtered_df = df[df['A'].between(2, 4, inclusive='left')]
filtered_df

Unnamed: 0,A,B,C
1,2,6,10
2,3,7,11


In [26]:
# 筛选出A列和B列
filtered_df = df.filter(['A', 'B'])
filtered_df

Unnamed: 0,A,B
0,1,5
1,2,6
2,3,7
3,4,8


In [27]:
# 使用正则表达式筛选出列名包含'A'的列
filtered_df = df.filter(regex='A')
filtered_df

Unnamed: 0,A
0,1
1,2
2,3
3,4


In [28]:
# 过滤出列A大于2且列B等于6的行
filtered_df = df[(df['A'] > 2) & (df['B'] == 7)]
filtered_df

Unnamed: 0,A,B,C
2,3,7,11


In [31]:
# 假设我们有一个DataFrame，其索引是日期类型
df = pd.DataFrame({
    'Data': [10, 20, 30, 40]
}, index=pd.date_range('20210101', periods=4))

# 过滤出2021年1月2日之后的数据
filtered_df = df['20210103':]
filtered_df

Unnamed: 0,Data
2021-01-03,30
2021-01-04,40


In [35]:
import pandas as pd

df = pd.DataFrame({
    'A': [1, 2, 3, 4],
    'B': [5, 6, 7, 8],
    'C': [9, 10, 11, 12]},
    index=list('abcd')
)

filtered_df = df['b':]
filtered_df

Unnamed: 0,A,B,C
b,2,6,10
c,3,7,11
d,4,8,12


In [5]:
import pandas as pd

df = pd.DataFrame({
    'A': [1, 2, 3, 4],
    'B': [5, 6, 7, 8],
    'C': [9, 10, 11, 12]},
    index=list('abcd')
)


Unnamed: 0,A,B,C
a,1,5,9
b,2,6,10
c,3,7,11
d,4,8,12
