# 综合练习

In [1]:
import numpy as np
import pandas as pd

## 【任务四】显卡日志

In [2]:
# 读取数据
df = pd.read_table('./data/Task 11/benchmark.txt',header=None)[0]

In [3]:
df.head(20)

0                                                 start
1                benchmark start :  2020/12/24 12:12:48
2                  Number of GPUs on current device : 1
3                                   CUDA Version : 11.0
4                                  Cudnn Version : 8005
5                        Device Name : GeForce RTX 3090
6     uname_result(system='Linux', node='gyh-X11DPi-...
7     scpufreq(current=1182.0009166666669, min=1000....
8                                         cpu_count: 36
9                        memory_available: 129118310400
10    Benchmarking Training float precision type mna...
11    mnasnet0_5  model average train time :  28.527...
12    Benchmarking Training float precision type mna...
13    mnasnet0_75  model average train time :  34.10...
14    Benchmarking Training float precision type mna...
15    mnasnet1_0  model average train time :  34.313...
16    Benchmarking Training float precision type mna...
17    mnasnet1_3  model average train time :  35

大体思路：用正则表达式提取所需信息。

In [4]:
import re

In [5]:
df_1 = df.str.extract('Benchmarking\s(?P<state>Train|Inference)\w*\s(?P<dtype>float|half|double)\sprecision\stype\s(?P<model>\w+)')

In [6]:
df_1.tail(10)

Unnamed: 0,state,dtype,model
386,Inference,double,shufflenet_v2_x0_5
387,,,
388,Inference,double,shufflenet_v2_x1_0
389,,,
390,Inference,double,shufflenet_v2_x1_5
391,,,
392,Inference,double,shufflenet_v2_x2_0
393,,,
394,,,
395,,,


In [7]:
# 为了与上面的结果对齐，需要往前shift
df_2 = df.str.extract('(?P<time>\d*\.*\d+)\sms').shift(-1)

In [8]:
mdf = df_1.join(df_2)

In [9]:
# 生成变形前的长表
edf = mdf[mdf.isna().all(1)==False].reset_index(drop=True)

In [10]:
edf = edf.sort_values(['state','dtype'],ascending=[False,False]).reset_index(drop=True)

In [11]:
edf.time = edf.time.astype('float')

In [12]:
# 长表变宽表
edf = edf.pivot(index='model',
          columns=['state','dtype'],
          values='time')

In [13]:
edf.head(3)

state,Train,Train,Train,Inference,Inference,Inference
dtype,half,float,double,half,float,double
model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
densenet121,88.976321,93.357434,417.206821,19.772344,15.63652,144.110632
densenet161,144.318886,136.624231,1290.286574,27.554517,31.750259,511.176925
densenet169,121.555843,104.839993,511.403556,26.370974,21.598272,175.807581


最后需要进行**多级列索引的压缩**：

In [14]:
edf.columns = edf.columns.map(lambda x: (x[0]+'-'+x[1]))

In [15]:
# 按照模型字母排序
edf = edf.sort_index()

In [16]:
# 保留三位小数
edf = edf.round(3)

最终结果如下：

In [17]:
edf.head(3)

Unnamed: 0_level_0,Train-half,Train-float,Train-double,Inference-half,Inference-float,Inference-double
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
densenet121,88.976,93.357,417.207,19.772,15.637,144.111
densenet161,144.319,136.624,1290.287,27.555,31.75,511.177
densenet169,121.556,104.84,511.404,26.371,21.598,175.808


## 【任务五】水压站点的特征工程

In [18]:
df1 = pd.read_csv('./data/Task 11/yali18.csv')
df2 = pd.read_csv('./data/Task 11/yali19.csv')
df3 = pd.read_csv('./data/Task 11/qx1819.csv')

In [19]:
df1.head(3)

Unnamed: 0,Time,MeasName,H0,H1,H2,H3,H4,H5,H6,H7,...,H14,H15,H16,H17,H18,H19,H20,H21,H22,H23
0,2018-01-01,站点4,0.40275,0.407625,0.418125,0.42525,0.426,0.42525,0.417375,0.426375,...,0.34875,0.35925,0.3555,0.34425,0.352125,0.35625,0.34725,0.343875,0.356625,0.418875
1,2018-01-01,站点7,0.214375,0.22675,0.232375,0.233125,0.235,0.23275,0.230875,0.22,...,0.187375,0.19675,0.19975,0.19225,0.18625,0.18325,0.17725,0.163375,0.16525,0.199375
2,2018-01-01,站点22,0.247,0.248125,0.271375,0.251125,0.272125,0.256375,0.257125,0.2425,...,0.2455,0.242875,0.238375,0.230875,0.23725,0.236875,0.2365,0.2365,0.241,0.2545


In [20]:
df2.head(3)

Unnamed: 0,Time,MeasName,H0,H1,H2,H3,H4,H5,H6,H7,...,H14,H15,H16,H17,H18,H19,H20,H21,H22,H23
0,2019-01-01,站点4,0.342,0.429375,0.429,0.44025,0.445875,0.44475,0.41775,0.387,...,0.319875,0.32625,0.323625,0.3225,0.309,0.307125,0.307125,0.307125,0.307125,0.307125
1,2019-01-01,站点7,0.215125,0.2395,0.2575,0.24625,0.275125,0.264625,0.229375,0.205375,...,0.180625,0.1765,0.181375,0.155125,0.159625,0.146125,0.144625,0.13525,0.158875,0.18475
2,2019-01-01,站点22,0.24475,0.248875,0.246625,0.247375,0.247375,0.2455,0.244,0.2395,...,0.238,0.236125,0.235375,0.238,0.23125,0.232375,0.22675,0.227875,0.236125,0.242125


In [21]:
df1['MeasName'] = df1.MeasName.str.extract('站点(\d+)')[0].astype('int')

In [22]:
df2['MeasName'] = df2.MeasName.str.extract('站点(\d+)')[0].astype('int')

In [23]:
df1 = df1.sort_values(['Time','MeasName']).reset_index(drop=True)

In [24]:
df2 = df2.sort_values(['Time','MeasName']).reset_index(drop=True)

In [25]:
df = df1.append(df2)

In [26]:
df.head(3)

Unnamed: 0,Time,MeasName,H0,H1,H2,H3,H4,H5,H6,H7,...,H14,H15,H16,H17,H18,H19,H20,H21,H22,H23
0,2018-01-01,1,0.288625,0.292,0.2905,0.2995,0.30025,0.292,0.29275,0.254125,...,0.257125,0.264625,0.2605,0.23425,0.23575,0.238375,0.2305,0.217,0.241,0.281875
1,2018-01-01,2,0.31775,0.31925,0.32,0.3215,0.32675,0.31475,0.313625,0.309875,...,0.311,0.31475,0.312875,0.31325,0.307625,0.311375,0.31175,0.31025,0.311375,0.314
2,2018-01-01,3,0.301375,0.30325,0.302125,0.30325,0.307,0.30775,0.295,0.292375,...,0.296875,0.296875,0.292375,0.2935,0.298375,0.290875,0.28825,0.289,0.29575,0.299875


In [27]:
# 长表变宽表
df = df.melt(id_vars = ['Time','MeasName'],
        value_vars = ['H%d'%x for x in range(24)],
        var_name = 'Hour',
        value_name = '压力')

In [28]:
df.head()

Unnamed: 0,Time,MeasName,Hour,压力
0,2018-01-01,1,H0,0.288625
1,2018-01-01,2,H0,0.31775
2,2018-01-01,3,H0,0.301375
3,2018-01-01,4,H0,0.40275
4,2018-01-01,5,H0,0.314625


In [29]:
df.Hour = df.Hour.str.extract('H(\d+)')[0]

In [30]:
res = df.copy()

In [31]:
df.head()

Unnamed: 0,Time,MeasName,Hour,压力
0,2018-01-01,1,0,0.288625
1,2018-01-01,2,0,0.31775
2,2018-01-01,3,0,0.301375
3,2018-01-01,4,0,0.40275
4,2018-01-01,5,0,0.314625


In [32]:
df['Time'] = pd.to_datetime(df.Time + ' '+ df.Hour + ':00:00')

In [33]:
df = df.drop('Hour',1).set_index('Time')

In [34]:
df = df.rename(columns={'MeasName':'站点'})

**第1问**结果如下：

In [35]:
df

Unnamed: 0_level_0,站点,压力
Time,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-01-01 00:00:00,1,0.288625
2018-01-01 00:00:00,2,0.317750
2018-01-01 00:00:00,3,0.301375
2018-01-01 00:00:00,4,0.402750
2018-01-01 00:00:00,5,0.314625
...,...,...
2019-12-31 23:00:00,26,0.323250
2019-12-31 23:00:00,27,0.312000
2019-12-31 23:00:00,28,0.294500
2019-12-31 23:00:00,29,0.265875


**以下为第2问**

In [36]:
temp = df3['气温'].str.extract('(?P<最高温>\d+).+?(?P<最低温>-?\d+)').astype('float')

In [37]:
df3.tail()

Unnamed: 0,日期,天气,气温,风向
724,2019-12-27,多云转晴,6℃～-1℃,西南风转南风 3-4级转3-4级
725,2019-12-28,多云转小雨,10℃～4℃,西南风转南风 3-4级转3-4级
726,2019-12-29,多云,11℃～2℃,西南风转北风 <3级转<3级
727,2019-12-30,阴转晴,4℃～-6℃,东北风转北风 4-5级转4-5级
728,2019-12-31,晴转多云,0℃～-5℃,西风转南风 <3级


In [38]:
temp['温差'] = temp['最高温'] - temp['最低温']

In [39]:
# 生成了备用气温框
temp.head(3)

Unnamed: 0,最高温,最低温,温差
0,1.0,-4.0,5.0
1,8.0,0.0,8.0
2,1.0,-1.0,2.0


In [40]:
df3 = pd.concat([df3,temp],1)

In [41]:
df3[df3['最高温'].isna()]

Unnamed: 0,日期,天气,气温,风向,最高温,最低温,温差
22,2018-01-24,转多云,C～-5C,转东北风 转,,,
643,2019-10-07,转多云,℃～9℃,转西风 转,,,


In [42]:
df3.loc[[22,643],'最低温'] = df3[df3['最高温'].isna()]['气温'].str.extract('～(-?\d)')[0]

In [43]:
df3.loc[22,'最高温'] = round(df3[df3['最低温']==-5]['最高温'].mean())

In [44]:
df3.loc[643,'最高温'] = round(df3[df3['最低温']==9]['最高温'].mean())

依次对各种天气进行处理

In [45]:
df3['沙暴'] = df3['天气'].str.contains('沙')

In [46]:
df3['雾'] = df3['天气'].str.contains('雾')

In [47]:
df3['雨'] = df3['天气'].str.contains('雨')

In [48]:
df3['雪'] = df3['天气'].str.contains('雪')

In [49]:
df3['晴天'] = df3['天气'].str.contains('晴')

In [50]:
weather = list(df3['天气'].unique())

In [51]:
print(pd.Series(re.findall('\w*雨',re.sub('转','',str(re.findall('转\w*雨|\w*雨转|转{0}雨转{0}',str(weather)))))).unique())

['雨' '小雨' '中雨' '雷阵雨' '中到大雨' '阵雨' '小到中雨' '大雨' '暴雨']


In [52]:
print(pd.Series(re.findall('\w*雪',re.sub('转','',str(re.findall('转\w*雪|\w*雪转|转{0}雪转{0}',str(weather)))))).unique())

['小雪' '小到中雪' '雨夹雪' '大雪' '中到大雪' '雪']


**第3问度量雨雪量有点复杂，暂时放一下**

In [53]:
df3.head()

Unnamed: 0,日期,天气,气温,风向,最高温,最低温,温差,沙暴,雾,雨,雪,晴天
0,2018-01-01,多云,1C～-4C,东南风 微风,1.0,-4,5.0,False,False,False,False,False
1,2018-01-02,阴转多云,8C～0C,东北风 3-4级,8.0,0,8.0,False,False,False,False,False
2,2018-01-03,阴转小雪,1C～-1C,东北风 4-5级转4-5级,1.0,-1,2.0,False,False,False,True,False
3,2018-01-04,阴,0C～-4C,东北风转北风 3-4级转3-4级,0.0,-4,4.0,False,False,False,False,False
4,2018-01-05,阴转多云,3C～-4C,西风转北风 3-4级转3-4级,3.0,-4,7.0,False,False,False,False,False


In [54]:
df3['风向_1'] = df3['风向'].str.split('\s',expand=True)[0]

不考虑风向的转变，也至少有**八种**风向。按照题目要求，应该只能加入东南西北四列。

In [55]:
df3['风向_西'] = df3['风向_1'].str.contains('西').astype('int')

In [56]:
df3['风向_东'] = df3['风向_1'].str.contains('东').astype('int')

In [57]:
df3['风向_南'] = df3['风向_1'].str.contains('南').astype('int')

In [58]:
df3['风向_北'] = df3['风向_1'].str.contains('北').astype('int')

In [59]:
df3 = df3.drop('风向_1',1)

**第2问完成情况如下**

In [60]:
res = res.merge(df3,left_on='Time',right_on='日期',how='left')

In [61]:
res['Time'] = pd.to_datetime(res.Time + ' '+ res.Hour + ':00:00')

In [62]:
res = res.rename(columns={'MeasName':'站点'})

In [63]:
res = res.set_index(['Time','站点']).loc[:,'最高温':]

In [64]:
df = df.reset_index().set_index(['Time','站点']).join(res).reset_index(1)

In [65]:
df.head()

Unnamed: 0_level_0,站点,压力,最高温,最低温,温差,沙暴,雾,雨,雪,晴天,风向_西,风向_东,风向_南,风向_北
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2018-01-01,1,0.288625,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0
2018-01-01,2,0.31775,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0
2018-01-01,3,0.301375,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0
2018-01-01,4,0.40275,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0
2018-01-01,5,0.314625,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0


**以下为第3问**

In [66]:
#先进行索引排序
ldf = df.set_index('站点',append=True).sort_index(level=['站点','Time']).reset_index(1)

In [67]:
ldf.head()

Unnamed: 0_level_0,站点,压力,最高温,最低温,温差,沙暴,雾,雨,雪,晴天,风向_西,风向_东,风向_南,风向_北
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2018-01-01 00:00:00,1,0.288625,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0
2018-01-01 01:00:00,1,0.292,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0
2018-01-01 02:00:00,1,0.2905,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0
2018-01-01 03:00:00,1,0.2995,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0
2018-01-01 04:00:00,1,0.30025,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0


In [68]:
# 构造时间序列
temp = ldf.index.to_series().dt

**第1部分**

没有更好思路，只能采取暴力解法。

In [69]:
ldf['year'],ldf['month'],ldf['hour']=temp.year,temp.month,temp.hour

In [70]:
ldf.head(3)

Unnamed: 0_level_0,站点,压力,最高温,最低温,温差,沙暴,雾,雨,雪,晴天,风向_西,风向_东,风向_南,风向_北,year,month,hour
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
2018-01-01 00:00:00,1,0.288625,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0,2018,1,0
2018-01-01 01:00:00,1,0.292,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0,2018,1,1
2018-01-01 02:00:00,1,0.2905,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0,2018,1,2


In [71]:
out1 = ldf.groupby(['year','month','hour'])['压力'].agg([('水压均值_1','mean')]).reset_index()

In [72]:
ldf = ldf.reset_index().merge(out1,on=['year','month','hour'],how='left').set_index('Time')

In [73]:
ldf['水压差_1'] = ldf['压力'] - ldf['水压均值_1']

**第1部分结果如下**：

In [74]:
ldf.head(2)

Unnamed: 0_level_0,站点,压力,最高温,最低温,温差,沙暴,雾,雨,雪,晴天,风向_西,风向_东,风向_南,风向_北,year,month,hour,水压均值_1,水压差_1
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2018-01-01 00:00:00,1,0.288625,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0,2018,1,0,0.264717,0.023908
2018-01-01 01:00:00,1,0.292,1.0,-4,5.0,False,False,False,False,False,0.0,1.0,1.0,0.0,2018,1,1,0.268392,0.023608


**第2部分**

In [75]:
ldf['week'],ldf['是否周末'] = temp.isocalendar().week, temp.dayofweek.isin([5,6])

In [76]:
res = ldf.groupby(['year','week','是否周末'])['压力'].mean().reset_index().pivot(index=['year','week'],columns='是否周末',values='压力').reset_index()

In [77]:
res.columns.name=''

In [78]:
res['水压差_2'] = res[True] - res[False]

In [79]:
ldf = ldf.reset_index().merge(res.drop([True,False],1),on=['year','week'],how='left').set_index('Time')

**第2部分结果如下**：

In [80]:
ldf.head(2)

Unnamed: 0_level_0,站点,压力,最高温,最低温,温差,沙暴,雾,雨,雪,晴天,...,风向_南,风向_北,year,month,hour,水压均值_1,水压差_1,week,是否周末,水压差_2
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01 00:00:00,1,0.288625,1.0,-4,5.0,False,False,False,False,False,...,1.0,0.0,2018,1,0,0.264717,0.023908,1,False,-0.006236
2018-01-01 01:00:00,1,0.292,1.0,-4,5.0,False,False,False,False,False,...,1.0,0.0,2018,1,1,0.268392,0.023608,1,False,-0.006236


**第3部分**

In [81]:
# 删去多余的列
ldf = ldf.drop(columns=['year','month','hour','week','水压均值_1','是否周末'])

In [82]:
res = ldf.groupby('站点').rolling('7D')['压力'].agg(['mean','std']).join(ldf.groupby('站点').rolling('7D')['压力'].quantile(0.95))

In [83]:
res = res.reset_index(0).rename(columns={'mean':'水压7日均值','std':'水压7日标准差','压力':'水压7日0.95分位数'})

In [84]:
res.iloc[:,1:].head()

Unnamed: 0_level_0,水压7日均值,水压7日标准差,水压7日0.95分位数
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-01-01 00:00:00,0.288625,,0.288625
2018-01-01 01:00:00,0.290313,0.002386,0.291831
2018-01-01 02:00:00,0.290375,0.001691,0.29185
2018-01-01 03:00:00,0.292656,0.004767,0.298375
2018-01-01 04:00:00,0.294175,0.005346,0.3001


In [85]:
ldf = ldf.reset_index().merge(res.reset_index(),on=['Time','站点']).set_index('Time')

In [86]:
res = df3.loc[:,['日期','雨','雪']].set_index('日期')

In [87]:
res.index = pd.to_datetime(res.index)

In [88]:
ldf['日期'] = pd.to_datetime(temp.date)

In [89]:
ldf = ldf.reset_index().merge(res.rolling('10D').sum().reset_index(),on='日期',how='left')

In [90]:
ldf = ldf.rename(columns={'雨_x':'雨','雪_x':'雪','雨_y':'7日下雨天数','雪_y':'7日下雪天数'}).set_index('Time')

**第3部分结果如下**：

In [91]:
ldf.head(2)

Unnamed: 0_level_0,站点,压力,最高温,最低温,温差,沙暴,雾,雨,雪,晴天,...,风向_南,风向_北,水压差_1,水压差_2,水压7日均值,水压7日标准差,水压7日0.95分位数,日期,7日下雨天数,7日下雪天数
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01 00:00:00,1,0.288625,1.0,-4,5.0,False,False,False,False,False,...,1.0,0.0,0.023908,-0.006236,0.288625,,0.288625,2018-01-01,0.0,0.0
2018-01-01 01:00:00,1,0.292,1.0,-4,5.0,False,False,False,False,False,...,1.0,0.0,0.023608,-0.006236,0.290313,0.002386,0.291831,2018-01-01,0.0,0.0


**第4部分**实在不会做了……

**第5部分**

In [92]:
delta = ldf.groupby(['站点','日期'])['压力'].idxmax().dt.hour - ldf.groupby(['站点','日期'])['压力'].idxmin().dt.hour

In [93]:
ldf = ldf.reset_index().merge(delta.reset_index().rename(columns={'压力':'高低压时差'}),on=['站点','日期'],how='left').set_index('Time')

In [94]:
ldf = ldf.drop('日期',1)

**第5部分结果如下**：

In [95]:
ldf.head(2)

Unnamed: 0_level_0,站点,压力,最高温,最低温,温差,沙暴,雾,雨,雪,晴天,...,风向_南,风向_北,水压差_1,水压差_2,水压7日均值,水压7日标准差,水压7日0.95分位数,7日下雨天数,7日下雪天数,高低压时差
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-01 00:00:00,1,0.288625,1.0,-4,5.0,False,False,False,False,False,...,1.0,0.0,0.023908,-0.006236,0.288625,,0.288625,0.0,0.0,-6
2018-01-01 01:00:00,1,0.292,1.0,-4,5.0,False,False,False,False,False,...,1.0,0.0,0.023608,-0.006236,0.290313,0.002386,0.291831,0.0,0.0,-6
