选取pandas中的行、列和元素值 <br>
loc和iloc的区别在于前者采用index名称，而后者采用index的绝对位置

In [4]:
import pandas as pd
import numpy as np

In [8]:
index = pd.Index(data=["Lee", "Richard", "Mia", "Teddy", "Judy", "Alice"], name="Name")
 
data = {
    "age": [np.nan, 30, 26, 30, np.nan, 20],
    "city": ["ShenYang", np.nan, "ShangHai", "Beijing", " ", "ShenZhen"],
    "sex": ["male", "female", None,  "male", np.nan, "unknown"],
    "enrollment": ["2016-03-20", "2015-04-12", "2017-3-6", None, "2018-11-2", np.nan]
}
 
students = pd.DataFrame(data=data, index=index)

In [9]:
students

Unnamed: 0_level_0,age,city,enrollment,sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Lee,,ShenYang,2016-03-20,male
Richard,30.0,,2015-04-12,female
Mia,26.0,ShangHai,2017-3-6,
Teddy,30.0,Beijing,,male
Judy,,,2018-11-2,
Alice,20.0,ShenZhen,,unknown


### 选取若干行

In [10]:
# 通过切片选取连续行
students[1:4]

Unnamed: 0_level_0,age,city,enrollment,sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Richard,30.0,,2015-04-12,female
Mia,26.0,ShangHai,2017-3-6,
Teddy,30.0,Beijing,,male


In [17]:
# loc根据index名称选取
students.loc[['Mia', 'Alice']]

Unnamed: 0_level_0,age,city,enrollment,sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mia,26.0,ShangHai,2017-3-6,
Alice,20.0,ShenZhen,,unknown


In [20]:
# iloc根据index位置选取
students.iloc[[2, 4]]

Unnamed: 0_level_0,age,city,enrollment,sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mia,26.0,ShangHai,2017-3-6,
Judy,,,2018-11-2,


In [22]:
# iloc 也支持类似于 切片的形式
students.iloc[2:5]

Unnamed: 0_level_0,age,city,enrollment,sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mia,26.0,ShangHai,2017-3-6,
Teddy,30.0,Beijing,,male
Judy,,,2018-11-2,


In [23]:
# loc 也支持类似于 切片的形式,  注意包含了索引end
students.loc['Mia':'Judy']

Unnamed: 0_level_0,age,city,enrollment,sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mia,26.0,ShangHai,2017-3-6,
Teddy,30.0,Beijing,,male
Judy,,,2018-11-2,


In [26]:
# 选取1行作为dataframe
students.iloc[[1]]

Unnamed: 0_level_0,age,city,enrollment,sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Richard,30.0,,2015-04-12,female


In [27]:
# 选取1行作为series
students.iloc[1]

age                   30
city                 NaN
enrollment    2015-04-12
sex               female
Name: Richard, dtype: object

### 选取若干列

In [28]:
# 指定多个columns名称
students[['age','city']]

Unnamed: 0_level_0,age,city
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Lee,,ShenYang
Richard,30.0,
Mia,26.0,ShangHai
Teddy,30.0,Beijing
Judy,,
Alice,20.0,ShenZhen


In [31]:
# 选择1列，返回dataframe
students[['age']]

Unnamed: 0_level_0,age
Name,Unnamed: 1_level_1
Lee,
Richard,30.0
Mia,26.0
Teddy,30.0
Judy,
Alice,20.0


In [32]:
# 选择1列，返回series
students['age']

Name
Lee         NaN
Richard    30.0
Mia        26.0
Teddy      30.0
Judy        NaN
Alice      20.0
Name: age, dtype: float64

In [33]:
# 通过.选取某列返回series
students.age

Name
Lee         NaN
Richard    30.0
Mia        26.0
Teddy      30.0
Judy        NaN
Alice      20.0
Name: age, dtype: float64

### 选取某个元素

In [46]:
# 对于Series，直接选取
student = students.iloc[1]
print(student.age)
print(student['age'])

30.0
30.0


In [52]:
# 对于Dataframe，loc定位选取, 注意iloc用这种方式会报错
students.loc['Mia','age']

26.0

In [53]:
# 对于Dataframe，iloc分两次取值
students.iloc[1]['sex']

'female'

In [54]:
# loc也可以采用类似于iloc的这种方式
students.loc['Mia']['age']

26.0

In [60]:
# at可用于选取单个元素，索引为索引名, 且速度更快
students.at['Mia', 'age']

26.0

In [67]:
# iat可用于选取单个元素，索引为索引编号, 且速度更快
students.iat[2, 2]

'2017-3-6'

#### 交叉选取多个元素

In [55]:
# loc的用法
students.loc[['Alice','Mia'], ['age','sex']]

Unnamed: 0_level_0,age,sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Alice,20.0,unknown
Mia,26.0,


In [59]:
# iloc的用法
students.iloc[1:4][['age','sex']]

Unnamed: 0_level_0,age,sex
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Richard,30.0,female
Mia,26.0,
Teddy,30.0,male
