## 安徽与杭州观测数据处理
## `Observation data in Anhui and Hangzhou`

---
*@author: Evan*\
*@date: 2023-03-06*

In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import datetime

In [2]:
# silence the warning note
import warnings
warnings.filterwarnings("ignore")

In [3]:
from matplotlib import rcParams
config = {
    "font.family":'Times New Roman',
    "mathtext.fontset":'stix',
    "font.serif": ['SimSun'], # simsun字体中文版就是宋体
}
rcParams.update(config)

In [4]:
data=pd.read_excel('D:/files/Master/02学术/横向项目_安庆_杭州/data_lzj/20200101_20230128_6YS_Re.xlsx')
data

Unnamed: 0,站点,年,月,日,时,SO2,NO,NO2,NOx,CO,...,风速,风向,气压,气温,湿度,NO2+O3,PM2.5/10,u,v,NO2/NOX
0,岳西县狮形小学,2020,1,1,0,,,,,,...,,,,,,,,,,
1,岳西县狮形小学,2020,1,1,1,25,,32,,0.655,...,,,,,,36.0,0.630769,,,
2,岳西县狮形小学,2020,1,1,2,25,,29,,0.628,...,,,,,,33.0,0.634921,,,
3,岳西县狮形小学,2020,1,1,3,25,,30,,0.649,...,,,,,,33.0,0.645161,,,
4,岳西县狮形小学,2020,1,1,4,26,,28,,0.711,...,,,,,,31.0,0.645161,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
514094,市人大,2023,2,15,20,9,2,26,29,0.73,...,2.1,19.1,1026.3,7.4,56,107.0,0.656716,-0.687158,-1.984393,0.896552
514095,市人大,2023,2,15,21,8,2,25,27,0.698,...,1.6,32,1026.5,6.8,59,104.0,0.734375,-0.847871,-1.356877,0.925926
514096,市人大,2023,2,15,22,8,2,26,28,0.687,...,1.3,30,1026.5,6.4,61,100.0,0.739130,-0.650000,-1.125833,0.928571
514097,市人大,2023,2,15,23,8,2,27,29,0.693,...,1.5,22.4,1026.9,6.2,60,100.0,0.787879,-0.571606,-1.386819,0.931034


In [5]:
# 这里为了将原数据的时间合并，先提取出组建了新df，将数据格式调整为字符串后再使用to_datetime函数
dateandtime=data[['年', '月', '日', '时']]
dateandtime=dateandtime.astype(str)
data['time']=pd.to_datetime(dateandtime['年'] + '-' + dateandtime['月'] + '-' + dateandtime['日'] + ' ' + dateandtime['时'] + ':00:00')

In [6]:
col=data['time']
data=data.drop(columns=['年','月','日','时','time'])
data.insert(1,'time',col)
data

Unnamed: 0,站点,time,SO2,NO,NO2,NOx,CO,O3,PM10,PM2.5,风速,风向,气压,气温,湿度,NO2+O3,PM2.5/10,u,v,NO2/NOX
0,岳西县狮形小学,2020-01-01 00:00:00,,,,,,,,,,,,,,,,,,
1,岳西县狮形小学,2020-01-01 01:00:00,25,,32,,0.655,4,65,41,,,,,,36.0,0.630769,,,
2,岳西县狮形小学,2020-01-01 02:00:00,25,,29,,0.628,4,63,40,,,,,,33.0,0.634921,,,
3,岳西县狮形小学,2020-01-01 03:00:00,25,,30,,0.649,3,62,40,,,,,,33.0,0.645161,,,
4,岳西县狮形小学,2020-01-01 04:00:00,26,,28,,0.711,3,62,40,,,,,,31.0,0.645161,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
514094,市人大,2023-02-15 20:00:00,9,2,26,29,0.73,81,67,44,2.1,19.1,1026.3,7.4,56,107.0,0.656716,-0.687158,-1.984393,0.896552
514095,市人大,2023-02-15 21:00:00,8,2,25,27,0.698,79,64,47,1.6,32,1026.5,6.8,59,104.0,0.734375,-0.847871,-1.356877,0.925926
514096,市人大,2023-02-15 22:00:00,8,2,26,28,0.687,74,69,51,1.3,30,1026.5,6.4,61,100.0,0.739130,-0.650000,-1.125833,0.928571
514097,市人大,2023-02-15 23:00:00,8,2,27,29,0.693,73,66,52,1.5,22.4,1026.9,6.2,60,100.0,0.787879,-0.571606,-1.386819,0.931034


In [8]:
# 将(H)等标记的数值记为缺失值
data=data.replace('\(.+\)',np.nan,regex=True)

In [7]:
# 查看所有站点
# print(data['站点'].unique())
# 查看每个站点的数据量
print(data['站点'].value_counts())

岳西县狮形小学       27481
岳西县气象局        27481
怀宁县青妇活动中心     27481
怀宁县振宁学校       27481
环科院           27481
马山宾馆          27481
潜山市档案馆        27481
潜山市公路局        27481
市人大           27481
宿松县第二中学       27481
宿松县气象局        27481
太湖县龙山路240号    27481
太湖县职业技术学校     27481
桐城市环境监测站      27481
桐城市开发区第二小学    27481
望江县人民检察院      27481
望江县实验中学       27481
安庆大学          27481
安庆政务中心        19441
Name: 站点, dtype: int64


In [50]:
data[data['站点'] == '岳西县狮形小学']

Unnamed: 0,站点,time,SO2,NO,NO2,NOx,CO,O3,PM10,PM2.5,风速,风向,气压,气温,湿度,NO2+O3,PM2.5/10,u,v,NO2/NOX
0,岳西县狮形小学,2020-01-01 00:00:00,,,,,,,,,,,,,,,,,,
1,岳西县狮形小学,2020-01-01 01:00:00,25,,32,,0.655,4,65,41,,,,,,36.0,0.630769,,,
2,岳西县狮形小学,2020-01-01 02:00:00,25,,29,,0.628,4,63,40,,,,,,33.0,0.634921,,,
3,岳西县狮形小学,2020-01-01 03:00:00,25,,30,,0.649,3,62,40,,,,,,33.0,0.645161,,,
4,岳西县狮形小学,2020-01-01 04:00:00,26,,28,,0.711,3,62,40,,,,,,31.0,0.645161,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
513418,岳西县狮形小学,2023-02-15 20:00:00,9,1,16,17,0.388,81,56,49,0.4,180,985,3.8,69,97.0,0.875000,-2.143592e-08,0.400000,0.941176
513419,岳西县狮形小学,2023-02-15 21:00:00,10,0,16,16,0.383,77,56,55,0.5,109.1,984.6,2.5,75,93.0,0.982143,-4.724745e-01,0.163609,1.000000
513420,岳西县狮形小学,2023-02-15 22:00:00,12,0,13,14,0.306,86,58,53,0.8,89.4,985.3,3.1,70,99.0,0.913793,-7.999561e-01,-0.008377,0.928571
513421,岳西县狮形小学,2023-02-15 23:00:00,26,0,17,17,0.324,56,58,52,0.5,230.9,985.3,1.3,81,73.0,0.896552,3.880232e-01,0.315338,1.000000


In [9]:
# 按照站点名称分组
grouped = data.groupby('站点')

# 遍历每个分组并保存到不同的 Excel 文件中
path='D:/Download/'
for site, group in grouped:
    file_name = f'{site}.xlsx'  # 生成文件名
    group.to_excel(path+file_name, index=False)  # 保存到 Excel 文件中