# 安徽安庆市项目`WRF-CMAQ`模拟分析
## 观测结果预处理

---
*@author: Evan*\
*@date: 2023-03-28*

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

from matplotlib import rcParams
config = {
    "font.family":'Times New Roman',
    "mathtext.fontset":'stix',
    "font.serif": ['SimSun'],
}
rcParams.update(config)

import cartopy.crs as ccrs
import cartopy.feature as cfeat
from cartopy.io.shapereader import Reader

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

In [2]:
# 读取原始数据
xls=pd.read_excel('F:/Data/case_anqing/obs_202302_fromlzj/obs_local_Feb.xlsx',header=0,na_values='—')

# 将日期与时间列合并
xls['datetime'] = pd.to_datetime(xls['日期'].astype(str) + ' ' + xls['时间'].astype(str))

# 删除“日期”列和“时间”列
xls.drop(['日期', '时间'], axis=1, inplace=True)

# 设置为索引
xls.set_index('datetime',inplace=True)
xls

Unnamed: 0_level_0,区县,站点,SO2,NO2,NOx,NO,CO,O3,PM10,PM2.5,PM1,风速,风向,气压,气温,湿度,降水量,能见度
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2023-02-01 00:00:00,宜秀区,安庆大学,9,68,72,3,0.878,21,48,37,,0.7,36.5,1009.8,10.7,47,0.000(BB),
2023-02-01 01:00:00,宜秀区,安庆大学,9,51,53,1,1.111,21,48,48,,0.4,298.7,1010.1,9,54,0.000(BB),
2023-02-01 02:00:00,宜秀区,安庆大学,9,54,57,2,1.067,16,58,46,,0.4,306.8,1010.3,8.2,57,0.000(BB),
2023-02-01 03:00:00,宜秀区,安庆大学,8,52,56,2,0.964,16,56,46,,0.2,19.6,1010.3,7.8,58,0.000(BB),
2023-02-01 04:00:00,宜秀区,安庆大学,8,51,61,6,0.962,11,56,48,,0.5,346.9,1010.4,7,63,0.000(BB),
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-02-28 19:00:00,宜秀区,市人大,12,40,43,2,0.806,56,51,38,,0.4,267.4,1017.7,12.6,69,,
2023-02-28 20:00:00,宜秀区,市人大,22,57,61,2,0.924,31,65,51,,0.6,318.3,1018,12.3,71,,
2023-02-28 21:00:00,宜秀区,市人大,22,66,73,4,1.002,15,80,52,,0.2,188.3,1018.4,12.4,72,,
2023-02-28 22:00:00,宜秀区,市人大,16,64,68,4,1.071,15,84,54,,0.6,129.7,1018.4,11.9,74,,


In [3]:
# 将所有因包含括号等字符被读取为字符串的数据设置为空值
xls = xls.replace(to_replace=[r'\(.*?\)'], value=np.nan, regex=True)

In [11]:
# 发现安庆政务中心站点的气压数据整体小一个量级
data_replace=xls[xls['站点'].isin(['安庆政务中心'])]['气压']
xls['气压'].replace(data_replace.values,data_replace.values*10,inplace=True)

In [14]:
# 根据站点分组导出
grouped = xls.groupby('站点')
t_index=pd.date_range('2023-02-01T00','2023-02-28T23',freq='h')

for group_name, group_df in grouped:
    # 时间序列如有缺失，则补充空值占位
    group_df.reindex(t_index)
    group_df.to_excel(f'D:/Download/{group_name}.xlsx', index=True)

In [15]:
# 将全部站点取平均并导出
xls.groupby(xls.index).mean().to_excel('D:/Download/allsite.xlsx',index=True)

In [16]:
# 将城区站与通道站分开
urban_path='F:/Data/case_anqing/obs_202302_fromlzj/urban/'
rural_path='F:/Data/case_anqing/obs_202302_fromlzj/rural/'
channel_path='F:/Data/case_anqing/obs_202302_fromlzj/channel/'
urban_files = [os.path.splitext(filename)[0] for filename in os.listdir(urban_path) if filename.endswith('.xlsx')]
rural_files = [os.path.splitext(filename)[0] for filename in os.listdir(rural_path) if filename.endswith('.xlsx')]
channel_files = [os.path.splitext(filename)[0] for filename in os.listdir(channel_path) if filename.endswith('.xlsx')]

urban=xls[xls['站点'].isin(urban_files)]
rural=xls[xls['站点'].isin(rural_files)]
channel=xls[xls['站点'].isin(channel_files)]

# 平均并导出
urban.groupby(urban.index).mean().to_excel('D:/Download/urban.xlsx',index=True)
rural.groupby(rural.index).mean().to_excel('D:/Download/rural.xlsx',index=True)
channel.groupby(channel.index).mean().to_excel('D:/Download/channel.xlsx',index=True)