# time datetime

In [1]:
import time
time.time() # 浮点数 now-1970年的时间戳

1577751633.9798768

In [2]:
time.localtime(time.time()) # struct_time

time.struct_time(tm_year=2019, tm_mon=12, tm_mday=31, tm_hour=8, tm_min=20, tm_sec=50, tm_wday=1, tm_yday=365, tm_isdst=0)

In [3]:
time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))

'2019-12-31 08:21:55'

In [4]:
t = time.strptime('1987-08-22 16:53', '%Y-%m-%d %H:%M')

In [5]:
t

time.struct_time(tm_year=1987, tm_mon=8, tm_mday=22, tm_hour=16, tm_min=53, tm_sec=0, tm_wday=5, tm_yday=234, tm_isdst=-1)

In [6]:
time.mktime(t)

556620780.0

In [7]:
from datetime import datetime

In [8]:
now = datetime.now()

In [9]:
now

datetime.datetime(2019, 12, 31, 8, 24, 4, 930876)

In [10]:
print(now.year)
print(now.month)
print(now.day)

2019
12
31


In [11]:
now.strftime('%Y-%m-%d %H:%M:%S')

'2019-12-31 08:24:04'

In [12]:
t = datetime.strptime('2019-08-22 16:53:30', '%Y-%m-%d %H:%M:%S')

In [13]:
t

datetime.datetime(2019, 8, 22, 16, 53, 30)

In [14]:
datetime.fromtimestamp(1577751633) # 将时间戳转为datetime

datetime.datetime(2019, 12, 31, 8, 20, 33)

In [15]:
now.timestamp() # 当下的时间戳

1577751844.930876

In [16]:
delta = datetime.now() - datetime(2018, 5, 21)

In [17]:
delta

datetime.timedelta(days=589, seconds=30603, microseconds=1351)

# Pandas时间序列

### 创建一个日期范围
通过指定周期和频率，使用date.range()函数就可以创建日期序列。 默认情况下，范围的频率是天。参考以下示例代码 -

In [18]:
import pandas as pd
# periods时序周期数  freq时序频率：Q 季度 M 月 W 周 D 天 H 小时 T 分 S 秒
datelist = pd.date_range('2019-12-1', periods = 5 , freq = 'W')

In [19]:
datelist

DatetimeIndex(['2019-12-01', '2019-12-08', '2019-12-15', '2019-12-22',
               '2019-12-29'],
              dtype='datetime64[ns]', freq='W-SUN')

## 案例
### 重新采样 更改时间序列为索引 重新分组
#### 工作中公司数据量庞大字段多，本机内存不够，常在Linux服务器上运行

In [21]:
import numpy as np
data = {
    'time' : pd.date_range('2019-12-21', periods = 200000, freq = 'T'),
    # randn()正态分布随机数
    # numpy + 10 矩阵中每一个数都会加10
    'cpu' : np.random.randn(200000)  + 10
}
# 通过字典创建dataframe
df = pd.DataFrame(data, columns = {'time', 'cpu'})
df.head()

Unnamed: 0,time,cpu
0,2019-12-21 00:00:00,11.155178
1,2019-12-21 00:01:00,10.695525
2,2019-12-21 00:02:00,11.16309
3,2019-12-21 00:03:00,9.670108
4,2019-12-21 00:04:00,9.421146


In [22]:
df.tail()

Unnamed: 0,time,cpu
199995,2020-05-07 21:15:00,10.142505
199996,2020-05-07 21:16:00,12.689645
199997,2020-05-07 21:17:00,10.579123
199998,2020-05-07 21:18:00,10.327889
199999,2020-05-07 21:19:00,10.19477


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Data columns (total 2 columns):
time    200000 non-null datetime64[ns]
cpu     200000 non-null float64
dtypes: datetime64[ns](1), float64(1)
memory usage: 3.1 MB


In [24]:
# 布尔索引访问
df[(df.time >= '2020-01-01 08:00:00') & (df.time <= '2020-01-01 08:10:00')]

Unnamed: 0,time,cpu
16320,2020-01-01 08:00:00,11.906683
16321,2020-01-01 08:01:00,10.646535
16322,2020-01-01 08:02:00,10.755883
16323,2020-01-01 08:03:00,9.932227
16324,2020-01-01 08:04:00,11.556679
16325,2020-01-01 08:05:00,8.741161
16326,2020-01-01 08:06:00,8.393469
16327,2020-01-01 08:07:00,9.9147
16328,2020-01-01 08:08:00,8.705527
16329,2020-01-01 08:09:00,10.183882


In [25]:
# 为了查看1小时内CPU占用情况，缩短数据频率，改造DataFrame
# 让时间作为索引，改造成5分钟一次作为索引，求5分钟的平均值
s = pd.to_datetime(df.time)
df.index = s
df.head()

Unnamed: 0_level_0,time,cpu
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2019-12-21 00:00:00,2019-12-21 00:00:00,11.155178
2019-12-21 00:01:00,2019-12-21 00:01:00,10.695525
2019-12-21 00:02:00,2019-12-21 00:02:00,11.16309
2019-12-21 00:03:00,2019-12-21 00:03:00,9.670108
2019-12-21 00:04:00,2019-12-21 00:04:00,9.421146


In [26]:
# 删除time列, 加入axis = 1参数
df1 = df.drop('time', axis = 1)
df1

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2019-12-21 00:00:00,11.155178
2019-12-21 00:01:00,10.695525
2019-12-21 00:02:00,11.163090
2019-12-21 00:03:00,9.670108
2019-12-21 00:04:00,9.421146
...,...
2020-05-07 21:15:00,10.142505
2020-05-07 21:16:00,12.689645
2020-05-07 21:17:00,10.579123
2020-05-07 21:18:00,10.327889


In [28]:
# 多行切片
df1['2020-01-01 08:00:00':'2020-01-01 08:10:00']

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2020-01-01 08:00:00,11.906683
2020-01-01 08:01:00,10.646535
2020-01-01 08:02:00,10.755883
2020-01-01 08:03:00,9.932227
2020-01-01 08:04:00,11.556679
2020-01-01 08:05:00,8.741161
2020-01-01 08:06:00,8.393469
2020-01-01 08:07:00,9.9147
2020-01-01 08:08:00,8.705527
2020-01-01 08:09:00,10.183882


In [29]:
# 查看一天的数据
df1['2020-01-01']

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
2020-01-01 00:00:00,9.625187
2020-01-01 00:01:00,10.024889
2020-01-01 00:02:00,10.417493
2020-01-01 00:03:00,9.899133
2020-01-01 00:04:00,9.548615
...,...
2020-01-01 23:55:00,9.136206
2020-01-01 23:56:00,9.707098
2020-01-01 23:57:00,10.402210
2020-01-01 23:58:00,9.973913


In [30]:
# 以天分组求平均
df1.groupby(df.index.date).mean()

Unnamed: 0,cpu
2019-12-21,9.964567
2019-12-22,10.001725
2019-12-23,10.016273
2019-12-24,10.006077
2019-12-25,10.014244
...,...
2020-05-03,9.944501
2020-05-04,9.986344
2020-05-05,9.998849
2020-05-06,10.013825


In [31]:
# 以小时分组求平均
df1.groupby(df.index.hour).mean()

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
0,10.006362
1,9.998441
2,10.020247
3,9.991877
4,10.003825
5,9.99722
6,9.988616
7,10.007708
8,9.994101
9,9.983921


In [32]:
# 以周分组求平均
df1.groupby(df.index.week).mean()

Unnamed: 0_level_0,cpu
time,Unnamed: 1_level_1
1,9.997606
2,9.999273
3,10.002915
4,9.999013
5,9.991771
6,9.984728
7,10.002739
8,10.010266
9,10.017254
10,9.984919


In [None]:
# 以5分钟为频率求平均
df1.resample('5T').mean()