In [1]:
import tensorflow as tf

### Defination
- $D_{a}^{t}$ the set of IAQI values for a certain pollutant in a city during time period t.
- $D_{p}$ the set of all POIs in a city
- $D_{r}$ consists of a set of linked road segments in a city, include the <b>start points</b> and <b>end points</b> and <b>road type</b>
- $D_{m}$ (district-level)denote the real-time meteorological information include <b>weather, temperature, pressure, humidity, wind speed and wind direction</b>
- $X^{m}$ meteorological features
- $X^{mt}$ the set of meteorological features during time period t
- $X^{p}$ POI data 12 dimension，which dimension include the sum of $cate^{c}$ 
- $X^{r}$ 3 dimension ,include highway, trunk, others ,the total length in a region
- for each station, extract features $X_{s}^{m},X_{s}^{p},X_{s}^{r}$ within s' affecting region
- $X_{s}^{d}$ that records the distance and direction of s to the target location l
- $X_{s}^{a}$ that contain a sequence of observed IAQI values in s over time.

### 系统架构
![](img/frame.png)

### target
- predict IAQI value for a target location in a given time period.


#### 关键点
- build an individual model for each pollutant
- ADAIN incorporates the attention mechanism to discriminate the importance of features from different monitoring stations automatically.
- certain distance d equal 2 kilometers
- one hot encoding for attribute { weather and wind_direction}
- other numerical ones normalize to range [0,1]

#### 不懂点
- To obtain training data, we deliberately remove a monitoring station, and associate the features extracted from data in its affecting region (i.e., within certain distance) and data collected by the remaining monitoring stations with the ground-truth IAQI value as one training example.

![adain](img/adain.png)

- 没有得到$POI$数据和路数据

### Feature Extraction

In [4]:
data_path = '/Users/icdi/Downloads/Data'

In [2]:
import os
import pandas as pd
import numpy as np

In [5]:
file = os.listdir(data_path)
file
bj = 1

['airquality.csv',
 'district.csv',
 'station.csv',
 'weatherforecast.csv',
 'city.csv',
 'meteorology.csv',
 'readme.pdf']

In [11]:
frame_dist = pd.read_csv(data_path + '/district.csv')
frame_dist.columns

Index(['district_id', 'name_chinese', 'name_english', 'city_id'], dtype='object')

In [65]:
bj_dist = np.unique(frame_dist[frame_dist['city_id'] == 1]['district_id'])
bj_dist ## 北京包含16个区域

array([101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113,
       114, 115, 116])

In [19]:
frame_station = pd.read_csv(data_path + '/station.csv')

In [20]:
frame_station.head()

Unnamed: 0,station_id,name_chinese,name_english,latitude,longitude,district_id
0,1001,海淀北部新区,HaiDianBeiBuXinQu,40.090679,116.173553,101
1,1002,海淀北京植物园,HaiDianBeiJingZhiWuYuan,40.00395,116.20531,101
2,1003,石景山古城,ShiJingShanGuCheng,39.914409,116.184239,102
3,1004,丰台云岗,FengTaiYunGang,39.815128,116.17115,103
4,1005,房山良乡,FangShanLiangXiang,39.742767,116.136045,104


In [37]:
frame_station[frame_station.isnull().values==True]

Unnamed: 0,station_id,name_chinese,name_english,latitude,longitude,district_id


In [87]:
bj_station =frame_station[list(map(lambda x: x in bj_dict , frame_station['district_id'].values))]

In [119]:
bj_station.to_csv('bj_station.csv',index = False ,encoding = 'gb2312')

In [93]:
len(bj_station)
bj_station.head()

Unnamed: 0,station_id,name_chinese,name_english,latitude,longitude,district_id
0,1001,海淀北部新区,HaiDianBeiBuXinQu,40.090679,116.173553,101
1,1002,海淀北京植物园,HaiDianBeiJingZhiWuYuan,40.00395,116.20531,101
2,1003,石景山古城,ShiJingShanGuCheng,39.914409,116.184239,102
3,1004,丰台云岗,FengTaiYunGang,39.815128,116.17115,103
4,1005,房山良乡,FangShanLiangXiang,39.742767,116.136045,104


In [102]:
bj_stat = np.unique(bj_station['station_id'])

In [100]:
frame_aq = pd.read_csv(data_path + '/airquality.csv')

In [101]:
frame_aq.head()

Unnamed: 0,station_id,time,PM25_Concentration,PM10_Concentration,NO2_Concentration,CO_Concentration,O3_Concentration,SO2_Concentration
0,1001,2014-05-01 00:00:00,138.0,159.4,56.3,0.9,50.8,17.2
1,1001,2014-05-01 01:00:00,124.0,163.9,38.7,0.9,51.1,17.9
2,1001,2014-05-01 02:00:00,127.0,148.4,55.6,1.0,27.2,16.6
3,1001,2014-05-01 03:00:00,129.0,145.6,65.7,1.0,9.7,16.7
4,1001,2014-05-01 04:00:00,119.0,119.3,66.9,1.0,2.0,16.5


In [96]:
frame_aq[frame_aq.isnull().values == True].head()

Unnamed: 0,station_id,time,PM25_Concentration,PM10_Concentration,NO2_Concentration,CO_Concentration,O3_Concentration,SO2_Concentration
18,1001,2014-05-01 18:00:00,174.0,,12.5,0.7,126.7,34.7
19,1001,2014-05-01 19:00:00,130.0,,6.2,0.4,91.5,15.3
26,1001,2014-05-02 02:00:00,34.0,,6.9,0.4,74.8,9.9
27,1001,2014-05-02 03:00:00,26.0,,6.8,0.4,75.0,9.1
44,1001,2014-05-02 20:00:00,15.0,,7.1,0.2,99.3,6.9


In [103]:
bj_aqi = frame_aq[list(map(lambda x: x in bj_stat, frame_aq['station_id'].values))]

In [104]:
bj_aqi.to_csv('bj_aqi.csv',index = False, encoding = 'gb2312')

In [105]:
bj_aqi.shape

(278023, 8)

In [106]:
bj_aqi.head()

Unnamed: 0,station_id,time,PM25_Concentration,PM10_Concentration,NO2_Concentration,CO_Concentration,O3_Concentration,SO2_Concentration
0,1001,2014-05-01 00:00:00,138.0,159.4,56.3,0.9,50.8,17.2
1,1001,2014-05-01 01:00:00,124.0,163.9,38.7,0.9,51.1,17.9
2,1001,2014-05-01 02:00:00,127.0,148.4,55.6,1.0,27.2,16.6
3,1001,2014-05-01 03:00:00,129.0,145.6,65.7,1.0,9.7,16.7
4,1001,2014-05-01 04:00:00,119.0,119.3,66.9,1.0,2.0,16.5


In [107]:
frame_wf = pd.read_csv(data_path + '/weatherforecast.csv')

In [108]:
frame_wf.head()

Unnamed: 0,id,time_forecast,time_future,frequent,weather,up_temperature,bottom_temperature,wind_level,wind_direction
0,1,2014-08-08 18:00:00,2014-08-08 20:00:00,6,1.0,27.0,23.0,0.0,3.0
1,1,2014-08-08 18:00:00,2014-08-09 02:00:00,6,1.0,25.0,21.0,0.0,4.0
2,1,2014-08-08 18:00:00,2014-08-09 08:00:00,6,1.0,30.0,25.0,0.0,4.0
3,1,2014-08-08 19:00:00,2014-08-08 20:00:00,6,1.0,27.0,23.0,0.0,3.0
4,1,2014-08-08 19:00:00,2014-08-09 02:00:00,6,1.0,25.0,21.0,0.0,4.0


In [109]:
bj_dist_wf = np.append(bj_dist,1)

In [110]:
frame_wf.shape

(910576, 9)

In [82]:
frame_wf[frame_wf.isnull().values==True].head()

Unnamed: 0,id,time_forecast,time_future,frequent,weather,up_temperature,bottom_temperature,wind_level,wind_direction
913,1,2014-08-23 18:00:00,2014-08-24 08:00:00,3,1.0,31.0,25.0,,0.0
917,1,2014-08-23 18:00:00,2014-08-24 20:00:00,3,0.0,23.0,21.0,,0.0
956,1,2014-08-24 18:00:00,2014-08-25 08:00:00,3,1.0,31.0,24.0,,0.0
1492,1,2014-09-06 07:00:00,2014-09-07 20:00:00,3,1.0,25.0,22.0,,4.0
1507,1,2014-09-06 11:00:00,2014-09-07 20:00:00,3,1.0,25.0,22.0,,4.0


In [111]:
bj_wf = frame_wf[list(map(lambda x: x in bj_dist_wf, frame_wf['id'].values))]

In [112]:
bj_wf.head()

Unnamed: 0,id,time_forecast,time_future,frequent,weather,up_temperature,bottom_temperature,wind_level,wind_direction
0,1,2014-08-08 18:00:00,2014-08-08 20:00:00,6,1.0,27.0,23.0,0.0,3.0
1,1,2014-08-08 18:00:00,2014-08-09 02:00:00,6,1.0,25.0,21.0,0.0,4.0
2,1,2014-08-08 18:00:00,2014-08-09 08:00:00,6,1.0,30.0,25.0,0.0,4.0
3,1,2014-08-08 19:00:00,2014-08-08 20:00:00,6,1.0,27.0,23.0,0.0,3.0
4,1,2014-08-08 19:00:00,2014-08-09 02:00:00,6,1.0,25.0,21.0,0.0,4.0


In [113]:
bj_wf.to_csv('bj_wf.csv',index = False,encoding = 'gb2312')

In [114]:
frame_city = pd.read_csv(data_path + '/city.csv')

In [115]:
frame_city.head()

Unnamed: 0,city_id,name_chinese,name_english,latitude,longitude,cluster_id
0,1,北京,BeiJing,39.90421,116.407394,1
1,4,深圳,ShenZhen,22.543099,114.057868,2
2,6,天津,TianJin,39.084158,117.200982,1
3,9,广州,GuangZhou,23.12911,113.264385,2
4,10,香港,XiangGang,22.396428,114.109497,2


In [116]:
file

['airquality.csv',
 'district.csv',
 'station.csv',
 'weatherforecast.csv',
 'city.csv',
 'meteorology.csv',
 'readme.pdf']

In [80]:
frame_mete = pd.read_csv(data_path + '/meteorology.csv')

In [10]:
frame_mete[frame_mete.isnull().values == True].shape

(563352, 8)

In [81]:
bj_mete = frame_mete[list(map(lambda x: x in bj_dist_wf, frame_mete['id'].values))]

In [82]:
bj_mete.head()

Unnamed: 0,id,time,weather,temperature,pressure,humidity,wind_speed,wind_direction
0,1,2014-05-01 02:00:00,,18.0,755.9,71.0,2.0,23.0
1,1,2014-05-01 05:00:00,,16.8,755.8,78.0,1.0,13.0
2,1,2014-05-01 08:00:00,,19.0,756.9,72.0,2.0,23.0
3,1,2014-05-01 11:00:00,,24.5,756.1,57.0,4.0,23.0
4,1,2014-05-01 14:00:00,,26.7,753.9,44.0,5.0,3.0


In [117]:
bj_mete.shape
bj_mete.to_csv('bj_mete.csv',index = False,encoding = 'gb2312')

In [79]:
file

['airquality.csv',
 'district.csv',
 'station.csv',
 'weatherforecast.csv',
 'city.csv',
 'meteorology.csv',
 'readme.pdf']

In [85]:
frame_mete['weather'].unique() ### weather 有18个类别

array([nan,  1.,  2.,  8.,  0.,  4.,  5.,  7., 14.,  3., 10., 11., 16.,
       15., 13., 12.,  6.,  9.])

In [86]:
(frame_mete['wind_direction'].unique())

array([23., 13.,  3.,  2.,  4., 24., 14.,  0.,  1.,  9., nan])