In [1]:
import pandas as pd
import numpy as np

%matplotlib inline

# 1.Python和Pandas日期工具的区别

In [2]:
#引入datetime模块，创建date、time和datetime对象
import datetime

date = datetime.date(year=2013,month=6,day=7)
time = datetime.time(hour=12,minute=30,second=19,microsecond=463198)
dt = datetime.datetime(year=2013,month=6,day=7,
                      hour=12,minute=30,second=19,microsecond=463198)

print('date is',date)
print('time is',time)
print('datetime is',dt)

date is 2013-06-07
time is 12:30:19.463198
datetime is 2013-06-07 12:30:19.463198


In [3]:
#创建并打印一个timedelta对象
td = datetime.timedelta(weeks=2,days=5,hours=10,minutes=20,
                       seconds=6.73,milliseconds=99,microseconds=8)
print(td)

19 days, 10:20:06.829008


In [4]:
#将date和datetime，与timedelta做加减
print('new date is',date + td)
print('new datetime is',dt + td)

new date is 2013-06-26
new datetime is 2013-06-26 22:50:26.292206


In [5]:
#time和timedelta不能做加法
time + td

TypeError: unsupported operand type(s) for +: 'datetime.time' and 'datetime.timedelta'

In [6]:
#再来看一下pandas的Timestamp对象。Timestamp构造器比较灵活，可以处理多种输入
pd.Timestamp(year=2012,month=12,day=21,hour=5,minute=10,second=8,microsecond=99)

Timestamp('2012-12-21 05:10:08.000099')

In [7]:
pd.Timestamp('2016/1/10')

Timestamp('2016-01-10 00:00:00')

In [8]:
pd.Timestamp('2014-5/10')

Timestamp('2014-05-10 00:00:00')

In [9]:
pd.Timestamp('Jan 3,2019 20:45.56')

Timestamp('2019-01-03 20:45:33')

In [10]:
pd.Timestamp('2016-01-05T05:34:43.123456789')

Timestamp('2016-01-05 05:34:43.123456789')

In [11]:
#也可以传入一个整数或浮点数，表示距离1970年1月1日的时间
pd.Timestamp(500)

Timestamp('1970-01-01 00:00:00.000000500')

In [12]:
pd.Timestamp(5000,unit='D')

Timestamp('1983-09-10 00:00:00')

In [13]:
#pandas的to_datetime函数与Timestamp类似，但有些参数不同
pd.to_datetime('2015-5-13')

Timestamp('2015-05-13 00:00:00')

In [14]:
pd.to_datetime('2015-13-5',dayfirst=True)

Timestamp('2015-05-13 00:00:00')

In [15]:
pd.Timestamp('Saturday September 30th,2017')

Timestamp('2017-09-30 00:00:00')

In [17]:
pd.to_datetime('Start Date: Sep 30,2017 Start Time: 1:30 pm',format='Start Date: %b %d,%Y Start Time: %I:%M %p')

Timestamp('2017-09-30 13:30:00')

In [18]:
pd.to_datetime(100,unit='D',origin='2013-1-1')

Timestamp('2013-04-11 00:00:00')

In [19]:
#to_datetime可以将一个字符串或整数列表或Series转换为时间戳
s = pd.Series([10,100,1000,10000])
pd.to_datetime(s,unit='D')

0   1970-01-11
1   1970-04-11
2   1972-09-27
3   1997-05-19
dtype: datetime64[ns]

In [20]:
s = pd.Series(['12-5-2015','14-1-2013','20/12/2017','40/23/2017'])
pd.to_datetime(s,dayfirst=True,errors='coerce')

0   2015-05-12
1   2013-01-14
2   2017-12-20
3          NaT
dtype: datetime64[ns]

In [23]:
pd.to_datetime(['Aug 3 1993 3:45:56','10/31/2017'])

DatetimeIndex(['1993-08-03 03:45:56', '2017-10-31 00:00:00'], dtype='datetime64[ns]', freq=None)

In [24]:
#Pandas的Timedelta和to_timedelta也可以用来表示一定的时间量
#to_timedelta函数可以产生一个Timedelta对象。
#与to_datetime类似，to_timedelta也可以转换列表或Series变成Timedelta对象
pd.Timedelta('12 days 5 hours 3 minutes 123456789 nanoseconds')

Timedelta('12 days 05:03:00.123456')

In [25]:
pd.Timedelta(days=5,minutes=7.34)

Timedelta('5 days 00:07:20.400000')

In [26]:
pd.Timedelta(100,unit='W')

Timedelta('700 days 00:00:00')

In [27]:
pd.to_timedelta('5 days',errors='ignore')

Timedelta('5 days 00:00:00')

In [28]:
pd.to_timedelta('67:15:45.454')

Timedelta('2 days 19:15:45.454000')

In [29]:
s = pd.Series([10,100])
pd.to_timedelta(s,unit='s')

0   00:00:10
1   00:01:40
dtype: timedelta64[ns]

In [30]:
time_strings = ['2 days 24 minutes 89.67 seconds','00:45:23.6']
pd.to_timedelta(time_strings)

TimedeltaIndex(['2 days 00:25:29.670000', '0 days 00:45:23.600000'], dtype='timedelta64[ns]', freq=None)

In [31]:
#Timedeltas对象可以和Timestamps互相加减，甚至可以相除返回一个浮点数
pd.Timedelta('12 days 5 hours 3 minutes') * 2

Timedelta('24 days 10:06:00')

In [32]:
pd.Timestamp('1/1/2017') + pd.Timedelta('12 days 5 hours 3 minutes') * 2

Timestamp('2017-01-25 10:06:00')

In [33]:
td1 = pd.to_timedelta([10,100],unit='s')
td2 = pd.to_timedelta(['3 hours','4 hours'])
td1 + td2

TimedeltaIndex(['03:00:10', '04:01:40'], dtype='timedelta64[ns]', freq=None)

In [34]:
pd.Timedelta('12 days') / pd.Timedelta('3 days')

4.0

In [35]:
#Timestamps和Timedeltas有许多可用的属性和方法，下面列举一些
ts = pd.Timestamp('2016-10-1 4:23:23.9')
ts.ceil('h')

Timestamp('2016-10-01 05:00:00')

In [36]:
ts.year,ts.month,ts.day,ts.hour,ts.minute,ts.second

(2016, 10, 1, 4, 23, 23)

In [37]:
ts.dayofweek,ts.dayofyear,ts.daysinmonth

(5, 275, 31)

In [38]:
ts.to_pydatetime()

datetime.datetime(2016, 10, 1, 4, 23, 23, 900000)

In [40]:
td = pd.Timedelta(125.8723,unit='h')
td

Timedelta('5 days 05:52:20.280000')

In [41]:
td.round('min')

Timedelta('5 days 05:52:00')

In [42]:
td.components

Components(days=5, hours=5, minutes=52, seconds=20, milliseconds=280, microseconds=0, nanoseconds=0)

In [43]:
td.total_seconds()

453140.28

In [44]:
#对比一下，在使用和没使用格式指令的条件下，将字符串转换为Timestamps对象的速度
date_string_list = ['Sep 30 1984'] * 10000

%timeit pd.to_datetime(date_string_list,format='%b %d %Y')

33.4 ms ± 1.91 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [45]:
%timeit pd.to_datetime(date_string_list)

1.21 s ± 57.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# 2.智能切分时间序列