# pandas中的索引和切片

In [9]:
import pandas as pd
import numpy as np
import random
df = pd.DataFrame(np.random.random(32).reshape(8, 4), index=list("abcdefgh"), columns=list("ABCD"))  # list后面跟()
df

Unnamed: 0,A,B,C,D
a,0.010351,0.262152,0.005405,0.009331
b,0.55001,0.226908,0.863487,0.997294
c,0.924076,0.135768,0.401613,0.613765
d,0.802573,0.509383,0.357479,0.936218
e,0.197868,0.100264,0.63438,0.639545
f,0.680831,0.282276,0.079278,0.163602
g,0.205355,0.682879,0.987507,0.274701
h,0.460443,0.030412,0.003879,0.016487


## .loc用法 
.loc主要是基于标签（label）的，包括行标签（index）和列标签（columns）

In [10]:
df.loc["c"]  # 单个标签选择行

A    0.924076
B    0.135768
C    0.401613
D    0.613765
Name: c, dtype: float64

In [11]:
df.loc[["a", "c", "e"]]  # 多个标签的列表，依旧表示的是行

Unnamed: 0,A,B,C,D
a,0.010351,0.262152,0.005405,0.009331
c,0.924076,0.135768,0.401613,0.613765
e,0.197868,0.100264,0.63438,0.639545


In [12]:
df.loc["c":"h"]  # 标签的切片对象，依旧表示行，但是包括开始和结束的行

Unnamed: 0,A,B,C,D
c,0.924076,0.135768,0.401613,0.613765
d,0.802573,0.509383,0.357479,0.936218
e,0.197868,0.100264,0.63438,0.639545
f,0.680831,0.282276,0.079278,0.163602
g,0.205355,0.682879,0.987507,0.274701
h,0.460443,0.030412,0.003879,0.016487


In [13]:
df.loc[df.A > 0.5]  # 布尔运算的数组，筛选出A列中大于0.5的行

Unnamed: 0,A,B,C,D
b,0.55001,0.226908,0.863487,0.997294
c,0.924076,0.135768,0.401613,0.613765
d,0.802573,0.509383,0.357479,0.936218
f,0.680831,0.282276,0.079278,0.163602


In [14]:
df.A  # 取出"A"列

a    0.010351
b    0.550010
c    0.924076
d    0.802573
e    0.197868
f    0.680831
g    0.205355
h    0.460443
Name: A, dtype: float64

In [15]:
df.loc[df.A > 0.5, ["C", "D"]]  # 筛选出"A"列大于0.5的所有行的"C"和"D"列

Unnamed: 0,C,D
b,0.863487,0.997294
c,0.401613,0.613765
d,0.357479,0.936218
f,0.079278,0.163602


In [17]:
df.loc["a", ["B", "D"]]  # 筛选"a"行中的"B"和"D"列的数据

B    0.262152
D    0.009331
Name: a, dtype: float64

#### 终于找到原因当索引为连续的时候不需要中括号，而且loc中的索引都是闭合的（start和end都可以取到）

In [19]:
df.loc[["b", "d"], "A":"C"]  

Unnamed: 0,A,B,C
b,0.55001,0.226908,0.863487
d,0.802573,0.509383,0.357479


In [20]:
df.loc[["b", "d"], ["A", "C"]]

Unnamed: 0,A,C
b,0.55001,0.863487
d,0.802573,0.357479


## .iloc用法
.iloc是基于位置的索引，和上面用用法类似，只是用的位置坐标

In [16]:
df.iloc[5]  # 输出第六行

A    0.680831
B    0.282276
C    0.079278
D    0.163602
Name: f, dtype: float64

In [21]:
df.iloc[[5, 1, 7]]  分别输出第6、2、8行

Unnamed: 0,A,B,C,D
f,0.680831,0.282276,0.079278,0.163602
b,0.55001,0.226908,0.863487,0.997294
h,0.460443,0.030412,0.003879,0.016487


In [34]:
df.iloc[0:5,[0, 2, 3]]  # .iloc对于连续索引右边是开区间

Unnamed: 0,A,C,D
a,0.010351,0.005405,0.009331
b,0.55001,0.863487,0.997294
c,0.924076,0.401613,0.613765
d,0.802573,0.357479,0.936218
e,0.197868,0.63438,0.639545


## [ ]切片用法
[]切片只能按照列索引来使用，使用位置坐标或者行索引都不行，由于是pandas中的dataframe数据，所以不能像numpy中使用[1,:]取第二行的所有内容

In [29]:
df["B"]  

a    0.262152
b    0.226908
c    0.135768
d    0.509383
e    0.100264
f    0.282276
g    0.682879
h    0.030412
Name: B, dtype: float64

In [31]:
df[["A", "C"]]

Unnamed: 0,A,C
a,0.010351,0.005405
b,0.55001,0.863487
c,0.924076,0.401613
d,0.802573,0.357479
e,0.197868,0.63438
f,0.680831,0.079278
g,0.205355,0.987507
h,0.460443,0.003879
