## 11.1日期和时间数据的类型及工具

In [49]:
from datetime import datetime
import pandas as pd
import numpy as np

In [3]:
now = datetime.now()

In [4]:
now

datetime.datetime(2019, 7, 21, 15, 47, 24, 489277)

In [5]:
now.year, now.month, now.day

(2019, 7, 21)

In [6]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)

In [7]:
delta

datetime.timedelta(days=926, seconds=56700)

In [8]:
delta.days

926

In [9]:
delta.seconds

56700

In [10]:
from datetime import timedelta

In [11]:
start = datetime(2011, 1, 7)

In [12]:
start + timedelta(12)

datetime.datetime(2011, 1, 19, 0, 0)

In [13]:
start - 2 * timedelta(12)

datetime.datetime(2010, 12, 14, 0, 0)

### 11.1.1字符串与datetime互相转换

In [22]:
stamp = datetime(2011, 1, 3)

In [23]:
str(stamp)

'2011-01-03 00:00:00'

In [24]:
stamp.strftime('%Y-%m-%d')

'2011-01-03'

In [25]:
stamp

datetime.datetime(2011, 1, 3, 0, 0)

In [26]:
value = '2011-01-03'

In [27]:
datetime.strptime(value, '%Y-%m-%d')

datetime.datetime(2011, 1, 3, 0, 0)

In [28]:
datestrs = ['7/6/2011', '8/6/2011']

In [30]:
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

[datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]

In [31]:
from dateutil.parser import parse

In [32]:
parse('2011-01-03')

datetime.datetime(2011, 1, 3, 0, 0)

In [33]:
parse('Jan 31, 1997 10:45 PM')

datetime.datetime(1997, 1, 31, 22, 45)

In [36]:
parse('6/12/2011', dayfirst=True)

datetime.datetime(2011, 12, 6, 0, 0)

In [40]:
datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [41]:
idx = pd.to_datetime(datestrs + [None])

In [42]:
idx

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [43]:
idx[2]

NaT

In [44]:
pd.isnull(idx)

array([False, False,  True])

## 11.2 时间序列基础

In [46]:
from datetime import datetime

In [47]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
         datetime(2011, 1, 7), datetime(2011, 1, 8),
         datetime(2011, 1, 10), datetime(2011, 1, 12)]

In [50]:
ts = pd.Series(np.random.randn(6), index=dates)

In [51]:
ts

2011-01-02   -0.223721
2011-01-05    0.646766
2011-01-07    0.994760
2011-01-08    0.549594
2011-01-10    0.112614
2011-01-12    1.403292
dtype: float64

In [53]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [54]:
ts + ts[::2]

2011-01-02   -0.447442
2011-01-05         NaN
2011-01-07    1.989519
2011-01-08         NaN
2011-01-10    0.225228
2011-01-12         NaN
dtype: float64

In [55]:
ts.index.dtype

dtype('<M8[ns]')

In [56]:
stamp = ts.index[0]

In [57]:
stamp

Timestamp('2011-01-02 00:00:00')

### 11.2.1索引、选择、子集

In [59]:
stamp = ts.index[2]

In [60]:
ts[stamp]

0.9947595203502951

In [61]:
ts['1/10/2011']
ts['20110110']

0.11261400692089056

In [62]:
longer_ts = pd.Series(np.random.randn(1000), index=pd.date_range('1/1/2000', periods=1000))

In [63]:
longer_ts

2000-01-01    2.522908
2000-01-02    0.533528
2000-01-03   -0.153660
2000-01-04    0.034125
2000-01-05   -0.567512
2000-01-06   -0.456938
2000-01-07   -0.594074
2000-01-08    0.887425
2000-01-09   -0.271760
2000-01-10   -1.980888
2000-01-11    0.992604
2000-01-12    0.621791
2000-01-13    0.682763
2000-01-14   -0.723731
2000-01-15   -0.827283
2000-01-16    0.836218
2000-01-17    0.649517
2000-01-18    0.643485
2000-01-19   -0.886647
2000-01-20    0.363609
2000-01-21    0.318913
2000-01-22    0.354430
2000-01-23   -0.800776
2000-01-24    1.423868
2000-01-25   -0.158179
2000-01-26    0.118221
2000-01-27    0.774632
2000-01-28    1.126901
2000-01-29   -0.024635
2000-01-30   -0.609965
                ...   
2002-08-28   -0.170101
2002-08-29   -0.463675
2002-08-30    1.555261
2002-08-31   -1.032518
2002-09-01   -0.382331
2002-09-02   -0.408631
2002-09-03    1.392943
2002-09-04   -0.888031
2002-09-05   -1.039951
2002-09-06   -0.668727
2002-09-07   -0.172767
2002-09-08    0.807397
2002-09-09 

In [64]:
longer_ts['2001']

2001-01-01   -0.423812
2001-01-02    1.793514
2001-01-03    0.551346
2001-01-04    0.806406
2001-01-05    0.293226
2001-01-06    0.464240
2001-01-07   -0.542398
2001-01-08    0.357011
2001-01-09    0.248063
2001-01-10   -0.532271
2001-01-11   -1.340918
2001-01-12    0.953504
2001-01-13    0.608797
2001-01-14   -1.315899
2001-01-15    1.266354
2001-01-16    0.514322
2001-01-17    1.319357
2001-01-18   -0.281613
2001-01-19    1.082743
2001-01-20   -1.112974
2001-01-21   -1.314214
2001-01-22    1.114744
2001-01-23    0.610045
2001-01-24   -1.434840
2001-01-25    0.466434
2001-01-26   -0.769594
2001-01-27    0.779176
2001-01-28   -0.371454
2001-01-29    0.842785
2001-01-30    0.520029
                ...   
2001-12-02   -1.212399
2001-12-03    0.688980
2001-12-04   -0.618518
2001-12-05    1.124397
2001-12-06   -2.126454
2001-12-07   -2.483123
2001-12-08    2.127412
2001-12-09   -0.407477
2001-12-10    1.247032
2001-12-11    1.345696
2001-12-12    1.574876
2001-12-13   -0.436649
2001-12-14 

In [65]:
longer_ts['2001-05']

2001-05-01   -0.548711
2001-05-02   -0.777061
2001-05-03   -1.905582
2001-05-04    1.707208
2001-05-05    0.121594
2001-05-06    0.484017
2001-05-07    0.115319
2001-05-08    2.052314
2001-05-09   -0.724474
2001-05-10   -0.798091
2001-05-11    0.833581
2001-05-12   -0.122872
2001-05-13   -0.294126
2001-05-14    0.736063
2001-05-15   -1.936579
2001-05-16    0.840689
2001-05-17   -1.044102
2001-05-18   -1.067325
2001-05-19   -2.567034
2001-05-20   -0.160422
2001-05-21   -0.000038
2001-05-22    0.668091
2001-05-23    0.183096
2001-05-24   -1.854385
2001-05-25    1.717118
2001-05-26    0.407657
2001-05-27    0.810552
2001-05-28    0.686471
2001-05-29    0.719392
2001-05-30   -0.858260
2001-05-31    2.152698
Freq: D, dtype: float64

In [66]:
ts[datetime(2011, 1, 7):]

2011-01-07    0.994760
2011-01-08    0.549594
2011-01-10    0.112614
2011-01-12    1.403292
dtype: float64

In [67]:
ts

2011-01-02   -0.223721
2011-01-05    0.646766
2011-01-07    0.994760
2011-01-08    0.549594
2011-01-10    0.112614
2011-01-12    1.403292
dtype: float64

In [68]:
ts['1/6/2011': '1/11/2011']

2011-01-07    0.994760
2011-01-08    0.549594
2011-01-10    0.112614
dtype: float64

In [69]:
ts.truncate(after='1/9/2011')

2011-01-02   -0.223721
2011-01-05    0.646766
2011-01-07    0.994760
2011-01-08    0.549594
dtype: float64

In [71]:
dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')

In [72]:
long_df = pd.DataFrame(np.random.randn(100, 4),
                      index=dates,
                      columns=['Colorado', 'Texas',
                                'New York', 'Ohio'])

In [73]:
long_df.loc['5-2001']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,0.614409,0.390621,0.850177,0.873503
2001-05-09,0.503678,-0.16521,-1.634439,1.132449
2001-05-16,-0.622204,0.412303,-0.93074,-0.287249
2001-05-23,0.217397,-1.279425,0.835723,-1.092915
2001-05-30,0.626853,-0.01536,-1.404594,0.470014


In [77]:
dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000',
                          '1/2/2000', '1/3/2000'])

In [78]:
dup_ts = pd.Series(np.arange(5), index=dates)

In [79]:
dup_ts

2000-01-01    0
2000-01-02    1
2000-01-02    2
2000-01-02    3
2000-01-03    4
dtype: int32

In [81]:
dup_ts.index.is_unique

False

In [82]:
dup_ts['1/3/2000']

4

In [83]:
dup_ts['1/2/2000']

2000-01-02    1
2000-01-02    2
2000-01-02    3
dtype: int32

In [84]:
grouped = dup_ts.groupby(level=0)

In [87]:
grouped.mean()

2000-01-01    0
2000-01-02    2
2000-01-03    4
dtype: int32

In [88]:
grouped.count()

2000-01-01    1
2000-01-02    3
2000-01-03    1
dtype: int64

## 11.3日期范围、频率和位移