## Date and time data types and tools

In [2]:
from datetime import datetime
now = datetime.now()
now

datetime.datetime(2020, 4, 8, 12, 17, 12, 214538)

In [3]:
now.year, now.month, now.day

(2020, 4, 8)

timedelta 

计算两个时间差

In [4]:
delta = datetime(2011, 1, 7) - datetime(2008, 6, 24, 8, 15)
delta

datetime.timedelta(days=926, seconds=56700)

In [6]:
delta.days

926

In [7]:
delta.seconds

56700

时间的加减乘除

In [9]:
from datetime import timedelta

In [10]:
start = datetime(2011, 1, 7)

In [12]:
start + timedelta(12)
# 12 表示天数

datetime.datetime(2011, 1, 19, 0, 0)

In [13]:
start -2*timedelta(12)

datetime.datetime(2010, 12, 14, 0, 0)

## converting between string and datetime

In [30]:
stamp = datetime(2011, 1, 3)

#### 时间转字符串

In [15]:
str(stamp)

'2011-01-03 00:00:00'

In [26]:
stamp.strftime('%Y-%m-%d')
# 其中 Y 表示 4 为的年， y 只有两位

'2011-01-03'

In [27]:
stamp.strftime('%Y%m%d')

'20110103'

In [29]:
stamp.strftime('%Y%m%d%H%M%S')
# 获取年月日时分秒

'20110103000000'

相关参数

<img src=https://bxdm-markdown.oss-cn-huhehaote.aliyuncs.com/2020-04-08_123551.jpg>

#### 字符串转时间

In [31]:
value = '2011-01-3'

In [32]:
datetime.strptime(value, '%Y-%m-%d')

datetime.datetime(2011, 1, 3, 0, 0)

In [33]:
datestrs = ['7/6/2011', '8/6/2011']

In [36]:
[datetime.strptime(x, '%m/%d/%Y') for x in datestrs]

[datetime.datetime(2011, 7, 6, 0, 0), datetime.datetime(2011, 8, 6, 0, 0)]

##### 使用 parser.pase 方法

In [38]:
from dateutil.parser import parse

In [39]:
parse('2011-01-03')

datetime.datetime(2011, 1, 3, 0, 0)

dateutil能够解析大多数人类可理解的日期表示形式

In [40]:
parse('Jan 31, 1997 10:45 PM')

datetime.datetime(1997, 1, 31, 22, 45)

使用 datafirst 参数解决日期在前的问题

In [43]:
parse('6/12/2011', dayfirst = True)

datetime.datetime(2011, 12, 6, 0, 0)

使用 pandas 处理时间

to_datetime方法

In [44]:
datestrs = ['2011-07-06 12:00:00', '2011-08-06 00:00:00']

In [45]:
import pandas as pd

In [46]:
pd.to_datetime(datestrs)

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00'], dtype='datetime64[ns]', freq=None)

In [48]:
idx = pd.to_datetime(datestrs + [None])

In [49]:
idx

DatetimeIndex(['2011-07-06 12:00:00', '2011-08-06 00:00:00', 'NaT'], dtype='datetime64[ns]', freq=None)

In [52]:
idx[2]

NaT

NaT 是空时间值， not a time

In [51]:
pd.isnull(idx)

array([False, False,  True])

## Time Series Basics

In [59]:
from datetime import datetime
import numpy as np

In [57]:
dates = [datetime(2011, 1, 2), datetime(2011, 1, 5),
        datetime(2011, 1, 7), datetime(2011, 1, 8),
        datetime(2011, 1, 10), datetime(2011, 1, 12)]

In [60]:
ts = pd.Series(np.random.randn(6), index = dates)

In [61]:
ts

2011-01-02    0.666623
2011-01-05   -0.841262
2011-01-07   -0.343656
2011-01-08   -0.412290
2011-01-10   -0.334201
2011-01-12   -0.249679
dtype: float64

In [62]:
ts.index

DatetimeIndex(['2011-01-02', '2011-01-05', '2011-01-07', '2011-01-08',
               '2011-01-10', '2011-01-12'],
              dtype='datetime64[ns]', freq=None)

In [63]:
ts[::2]

2011-01-02    0.666623
2011-01-07   -0.343656
2011-01-10   -0.334201
dtype: float64

In [64]:
ts+ts[::2]

2011-01-02    1.333245
2011-01-05         NaN
2011-01-07   -0.687313
2011-01-08         NaN
2011-01-10   -0.668402
2011-01-12         NaN
dtype: float64

In [65]:
ts.index.dtype

dtype('<M8[ns]')

In [66]:
stamp = ts.index[0]

In [68]:
stamp

Timestamp('2011-01-07 00:00:00')

### indexing selecting, subsetting

In [69]:
stamp = ts.index[2]

In [70]:
ts[stamp]

-0.3436563845253775

In [71]:
ts['1/10/2011']

-0.3342007883700374

In [72]:
ts['20110110']

-0.3342007883700374

In [74]:
longer_ts = pd.Series(np.random.randn(1000),
                     index = pd.date_range('1/1/2000', periods = 1000))

In [76]:
longer_ts

2000-01-01   -0.064458
2000-01-02   -0.058273
2000-01-03    1.245547
2000-01-04    0.176521
2000-01-05   -0.797622
                ...   
2002-09-22    1.736215
2002-09-23   -0.385079
2002-09-24   -1.317306
2002-09-25    0.094922
2002-09-26    0.783847
Freq: D, Length: 1000, dtype: float64

In [77]:
longer_ts['2001']

2001-01-01    1.251949
2001-01-02    1.166142
2001-01-03   -0.095799
2001-01-04   -1.223505
2001-01-05    1.190794
                ...   
2001-12-27    0.699319
2001-12-28    1.925360
2001-12-29    0.121326
2001-12-30   -1.068080
2001-12-31   -0.175314
Freq: D, Length: 365, dtype: float64

In [78]:
longer_ts['2001-05']

2001-05-01   -1.117459
2001-05-02   -0.759735
2001-05-03    0.859822
2001-05-04    0.571445
2001-05-05    0.261802
2001-05-06    1.201645
2001-05-07   -0.956378
2001-05-08   -0.948937
2001-05-09    0.698415
2001-05-10    0.622395
2001-05-11    0.272448
2001-05-12   -0.677829
2001-05-13    0.056589
2001-05-14    0.887447
2001-05-15   -0.030199
2001-05-16   -3.145597
2001-05-17   -1.045838
2001-05-18    0.020958
2001-05-19    0.759048
2001-05-20    0.949625
2001-05-21    1.144893
2001-05-22    0.926825
2001-05-23    0.938114
2001-05-24   -0.671009
2001-05-25   -0.549282
2001-05-26    0.205041
2001-05-27    1.061081
2001-05-28    0.736357
2001-05-29   -0.842276
2001-05-30    2.143719
2001-05-31    1.989562
Freq: D, dtype: float64

In [80]:
ts[datetime(2011, 1, 7):]

2011-01-07   -0.343656
2011-01-08   -0.412290
2011-01-10   -0.334201
2011-01-12   -0.249679
dtype: float64

In [81]:
ts

2011-01-02    0.666623
2011-01-05   -0.841262
2011-01-07   -0.343656
2011-01-08   -0.412290
2011-01-10   -0.334201
2011-01-12   -0.249679
dtype: float64

In [82]:
ts['1/6/2011':'1/11/2011']

2011-01-07   -0.343656
2011-01-08   -0.412290
2011-01-10   -0.334201
dtype: float64

truncate 方法切割时间

In [83]:
ts.truncate(after = '1/9/2011')

2011-01-02    0.666623
2011-01-05   -0.841262
2011-01-07   -0.343656
2011-01-08   -0.412290
dtype: float64

In [84]:
dates = pd.date_range('1/1/2000', periods = 100, freq = 'W-WED')

In [85]:
long_df = pd.DataFrame(np.random.randn(100, 4),
                      index = dates,
                      columns = ['Colorado', 'Texas',
                                'New York', 'Ohio'])

In [86]:
long_df.loc['5-2001']

Unnamed: 0,Colorado,Texas,New York,Ohio
2001-05-02,-0.336026,2.052724,0.110093,-1.002133
2001-05-09,-0.059237,-0.359659,-1.605254,0.413954
2001-05-16,-0.236027,-1.099272,-1.270959,-0.389026
2001-05-23,-0.629547,0.56627,0.556752,-1.157946
2001-05-30,-0.905021,0.177136,-0.692328,1.958049


P326

获取当前的时间

In [93]:
from datetime import datetime
time = datetime.now().strftime('%Y%m%d%H%M%S')
time

'20200408140926'