In [1]:
import numpy as np
import pandas as pd

In [2]:
dates = pd.date_range('20180823',periods=7)
dates

DatetimeIndex(['2018-08-23', '2018-08-24', '2018-08-25', '2018-08-26',
               '2018-08-27', '2018-08-28', '2018-08-29'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(7,4),index=dates,columns=list('ABCD'))

In [4]:
df

Unnamed: 0,A,B,C,D
2018-08-23,0.193088,-0.77355,-1.195571,-1.318505
2018-08-24,-0.989626,0.472926,0.724446,0.47429
2018-08-25,-1.22778,-0.132862,-1.287458,-0.138462
2018-08-26,-0.842578,0.437614,0.207724,-1.086497
2018-08-27,-0.164033,0.084944,0.146501,-0.248172
2018-08-28,-1.358677,3.068604,0.354766,0.472807
2018-08-29,0.146754,-0.545101,-0.325549,0.143098


In [5]:
# 选择A列的数据进行操作：df['A']
df['A']

2018-08-23    0.193088
2018-08-24   -0.989626
2018-08-25   -1.227780
2018-08-26   -0.842578
2018-08-27   -0.164033
2018-08-28   -1.358677
2018-08-29    0.146754
Freq: D, Name: A, dtype: float64

### 切片
可以使用数组的切片操作，但是注意了，切片得到的是**行数据**  
在**列上**不成立

In [6]:
df[1:3]

Unnamed: 0,A,B,C,D
2018-08-24,-0.989626,0.472926,0.724446,0.47429
2018-08-25,-1.22778,-0.132862,-1.287458,-0.138462


还可以使用行标签来指定输出的行

In [7]:
df['20180823':'20180826']

Unnamed: 0,A,B,C,D
2018-08-23,0.193088,-0.77355,-1.195571,-1.318505
2018-08-24,-0.989626,0.472926,0.724446,0.47429
2018-08-25,-1.22778,-0.132862,-1.287458,-0.138462
2018-08-26,-0.842578,0.437614,0.207724,-1.086497


DataFrame的**loc**方法是帮助选择数据的，比如选择索引位置为0的一行数据（注意我们是用dates作为索引的）

In [8]:
df.loc[dates[0]]

A    0.193088
B   -0.773550
C   -1.195571
D   -1.318505
Name: 2018-08-23 00:00:00, dtype: float64

In [10]:
df.loc['20180824']

A   -0.989626
B    0.472926
C    0.724446
D    0.474290
Name: 2018-08-24 00:00:00, dtype: float64

In [11]:
# 选择多列数据
df.loc[:,['A','B']]

Unnamed: 0,A,B
2018-08-23,0.193088,-0.77355
2018-08-24,-0.989626,0.472926
2018-08-25,-1.22778,-0.132862
2018-08-26,-0.842578,0.437614
2018-08-27,-0.164033,0.084944
2018-08-28,-1.358677,3.068604
2018-08-29,0.146754,-0.545101


In [12]:
# 直接使用列名，放在一个列表
df[['A','C']]

Unnamed: 0,A,C
2018-08-23,0.193088,-1.195571
2018-08-24,-0.989626,0.724446
2018-08-25,-1.22778,-1.287458
2018-08-26,-0.842578,0.207724
2018-08-27,-0.164033,0.146501
2018-08-28,-1.358677,0.354766
2018-08-29,0.146754,-0.325549


In [13]:
# 选择的是一个局部数据，是行和列的交叉区域
df.loc['20180823':'20180826',['A','D']]

Unnamed: 0,A,D
2018-08-23,0.193088,-1.318505
2018-08-24,-0.989626,0.47429
2018-08-25,-1.22778,-0.138462
2018-08-26,-0.842578,-1.086497


In [14]:
# 只选择某一个数据，可以指定行和列：
df.loc[dates[1],'D']

0.4742896269719507

In [15]:
# 当然，at方法是专门用于获取某个值的：
df.at[dates[0],'A']

0.19308797961813945