In [36]:
#python原生的时间类型
from datetime import datetime
from datetime import timedelta
import pandas as pd
import numpy as np
from pandas.tseries.offsets import *

In [6]:
#创建时间对象
datetime1 = datetime(2012,10,1,10,30,20)
#创建时间偏移量 days表示偏移天数，hours偏移的小时数，minutes偏移的分钟数
timedelta1 = timedelta(days=1,hours=12,minutes=30)

In [7]:
#时间对象可以和时间偏移量对象进行运算，结果为新的时间对象
datetime1 + timedelta1
#python称这种没有时区信息的时间对象为简单时间对象

datetime.datetime(2012, 10, 2, 23, 0, 20)

In [None]:
#numpy定义时间类型np.datetime64，用8字节表示一个时间，时间范围，0000-01-01 00：00：00.000 至9999-12-31 23：59：59.999
#对比mysql中timestamp时间类型，4个字节表示时间，时间范围，1970-01-01 00：00：00 至2038,精度到秒


In [None]:
#pandas在numpy基础之上定义多个时间相关对象
#Timestamp 时间对象，表示特定的一个时间点，如现在
#Period 时期对象，表示一个时间段，2019年，2021年5月
#Interval 该对象中包含起始时间对象和终止时间对象

In [11]:
#时间对象的创建
#pd.to_datetime()可以接受各种时间格式字符串，也可以用datetime对象
pd.to_datetime("2021/05/16")

Timestamp('2021-05-16 00:00:00')

In [12]:
pd.to_datetime(datetime1)

Timestamp('2012-10-01 10:30:20')

In [14]:
#pd.to_datetime()接受时间格式字符串列表, 返回一个时间序列DatetimeIndex
times_index = pd.to_datetime(["2012-5-1","2013/5/3","20120502","2014-12-13"])

In [15]:
#创建使用时间序列的dataframe或series
series1 = pd.Series([10,20,30,40],index=times_index)

In [18]:
#好处： 可读性高了，索引数据时，可以使用时期进行模糊查询
series1["2012-05"]

2012-05-01    10
2012-05-02    30
dtype: int64

In [19]:
#astype转换时间序列
# 使用美国某城市的2014年7月的温度数据
weather_df = pd.read_csv("./data/sitka_weather_07-2014.csv", usecols=["AKDT","Max_TemperatureF","Mean_TemperatureF","Min_TemperatureF"])


In [26]:
#给weather_df增加一列，从AKDT转成了datetime64类型
weather_df["date"] = weather_df["AKDT"].astype(np.datetime64)

In [28]:
#date列作为索引
weather_df.set_index("date", inplace=True)

In [32]:
#使用truncate()截取记录集
# before=a时间 表示截掉a时间之前的数据集，after=b时间，表示截掉b时间之后的数据集
weather_df.truncate(before="2014-7-10",after="2014-7-19")
# 使用布尔索引也能做

Unnamed: 0_level_0,AKDT,Max_TemperatureF,Mean_TemperatureF,Min_TemperatureF
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-07-10,2014-7-10,61,56,53
2014-07-11,2014-7-11,57,56,54
2014-07-12,2014-7-12,59,56,55
2014-07-13,2014-7-13,57,56,55
2014-07-14,2014-7-14,61,58,55
2014-07-15,2014-7-15,64,58,55
2014-07-16,2014-7-16,61,56,52
2014-07-17,2014-7-17,59,55,51
2014-07-18,2014-7-18,63,56,51
2014-07-19,2014-7-19,60,57,54


In [30]:
weather_df.index

DatetimeIndex(['2014-07-01', '2014-07-02', '2014-07-03', '2014-07-04',
               '2014-07-05', '2014-07-06', '2014-07-07', '2014-07-08',
               '2014-07-09', '2014-07-10', '2014-07-11', '2014-07-12',
               '2014-07-13', '2014-07-14', '2014-07-15', '2014-07-16',
               '2014-07-17', '2014-07-18', '2014-07-19', '2014-07-20',
               '2014-07-21', '2014-07-22', '2014-07-23', '2014-07-24',
               '2014-07-25', '2014-07-26', '2014-07-27', '2014-07-28',
               '2014-07-29', '2014-07-30', '2014-07-31'],
              dtype='datetime64[ns]', name='date', freq=None)

In [33]:
# 生成时间序列的其他方式
# start=开始时间，end=结束时间，periods=个数，freq=频率 默认值为D，1天
pd.date_range(start="2012-01-01",periods=10)

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',
               '2012-01-05', '2012-01-06', '2012-01-07', '2012-01-08',
               '2012-01-09', '2012-01-10'],
              dtype='datetime64[ns]', freq='D')

In [34]:
pd.date_range(start="2012-01-01",end="2012-01-10")

DatetimeIndex(['2012-01-01', '2012-01-02', '2012-01-03', '2012-01-04',
               '2012-01-05', '2012-01-06', '2012-01-07', '2012-01-08',
               '2012-01-09', '2012-01-10'],
              dtype='datetime64[ns]', freq='D')

In [35]:
#认识freq "D"天、"W"周 "M"月,"H"小时 可以和数字配合使用 "2D"
pd.date_range(start="2012-01-01",periods=10, freq="12H")

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-01 12:00:00',
               '2012-01-02 00:00:00', '2012-01-02 12:00:00',
               '2012-01-03 00:00:00', '2012-01-03 12:00:00',
               '2012-01-04 00:00:00', '2012-01-04 12:00:00',
               '2012-01-05 00:00:00', '2012-01-05 12:00:00'],
              dtype='datetime64[ns]', freq='12H')

In [38]:
#自定义频率，使用DateOffset 时间偏移量对象
#创建DateOffset对象
dateOffset1 = Week(1) + Day(1)+ Hour(12) #8天12小时的时间偏移量
#使用自定义的时间偏移量
pd.date_range(start="2012-01-01",periods=10, freq=dateOffset1)

DatetimeIndex(['2012-01-01 00:00:00', '2012-01-09 12:00:00',
               '2012-01-18 00:00:00', '2012-01-26 12:00:00',
               '2012-02-04 00:00:00', '2012-02-12 12:00:00',
               '2012-02-21 00:00:00', '2012-02-29 12:00:00',
               '2012-03-09 00:00:00', '2012-03-17 12:00:00'],
              dtype='datetime64[ns]', freq='204H')

In [43]:
#时期对象 Period 表示一个时间段
#使用构造方法创建时间对象, 时期对象中有频率属性
period1 = pd.Period("2019-7", "D")

In [44]:
#时期对象可以方便的进行标量运算
period1 + 1

Period('2019-07-02', 'D')

In [46]:
#使用pd.period_range创建时期时间序列 PeriodIndex
#start=开始时期，end=结束时期，periods=个数，freq=频率 默认值为D，1天
pd.period_range(start="2012-1",periods=10, freq="M")

PeriodIndex(['2012-01', '2012-02', '2012-03', '2012-04', '2012-05', '2012-06',
             '2012-07', '2012-08', '2012-09', '2012-10'],
            dtype='period[M]', freq='M')

In [None]:
#使用方式和DatetimeIndex几乎没有区别

In [48]:
#重采样 resample，对数据集进行处理
#降采样 降低采样率 pd.resample(较粗粒度的频率).聚合函数() mean()
#升采样 升高采样率 pd.resample(较细粒度的频率).插值函数() ffill使用前一个纪录数据来插值，bfill使用后一个纪录数据来插值
weather_df.resample("2D").mean()


Unnamed: 0_level_0,Max_TemperatureF,Mean_TemperatureF,Min_TemperatureF
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2014-07-01,67.5,59.0,52.5
2014-07-03,61.5,57.0,52.5
2014-07-05,65.5,58.5,52.5
2014-07-07,58.0,55.5,54.0
2014-07-09,59.0,55.5,53.0
2014-07-11,58.0,56.0,54.5
2014-07-13,59.0,57.0,55.0
2014-07-15,62.5,57.0,53.5
2014-07-17,61.0,55.5,51.0
2014-07-19,58.5,56.0,53.0


In [50]:
weather_df.resample("12H").ffill()

Unnamed: 0_level_0,AKDT,Max_TemperatureF,Mean_TemperatureF,Min_TemperatureF
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2014-07-01 00:00:00,2014-7-1,64,56,50
2014-07-01 12:00:00,2014-7-1,64,56,50
2014-07-02 00:00:00,2014-7-2,71,62,55
2014-07-02 12:00:00,2014-7-2,71,62,55
2014-07-03 00:00:00,2014-7-3,64,58,53
...,...,...,...,...
2014-07-29 00:00:00,2014-7-29,61,56,51
2014-07-29 12:00:00,2014-7-29,61,56,51
2014-07-30 00:00:00,2014-7-30,61,57,54
2014-07-30 12:00:00,2014-7-30,61,57,54
