# 安徽安庆市项目
## 本地排放清单预处理`Local Emission Inventory`

---
*@author: Evan*\
*@date: 2023-09-25*

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import os

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append('../../src/')
from namelist import *
import findpoint as fp

创建网格变量

In [2]:
grid = xr.open_dataset(progdir+'GRIDCRO2D_2023141.nc')
lat = grid.LAT[0,0,:,:]
lon = grid.LON[0,0,:,:]

gridfile = xr.Dataset(
    data_vars = dict(
        ShapeVar = (['y','x'],np.zeros_like(lat),{'long name':'not-used variable'})
    ),
    coords=dict(
        latitude = (['y','x'],lat.data),
        longitude = (['y','x'],lon.data)
    )
)
gridfile

读取本地清单点源数据，对空值部分用0填充

In [3]:
ef = pd.read_excel(emispoint,skiprows=2,sheet_name='ps')

species = ['SO2','NOx','CO','VOCs','NH3','PM10','PM25','BC','OC'] #,'TSP','CO2']
# species_columns = [f'{spec}排放量' for spec in species]
read_columns = ['中心经度','中心纬度','排放源大类名称','第一级排放源名称']
read_columns.extend(species)

ef = ef[read_columns].iloc[1:]
# ef[species] = ef[species].fillna(np.float64(0),inplace=False) # 将污染物是空值的填充为0

In [4]:
non_numeric_items = []

for col in species:
    # 尝试将列的值转换为数字类型
    try:
        pd.to_numeric(ef[col])
    except ValueError:
        # 转换失败，表示存在非数字的项
        non_numeric_items.append(col)

if non_numeric_items:
    print("以下列包含非数字项：")
    print(non_numeric_items)
else:
    print("所有列中的数据都是数字。")

所有列中的数据都是数字。


用嵌套字典存储多个dataframe，便于循环处理

In [5]:
dfs = {}
grouped = ef.groupby(['排放源大类名称', '第一级排放源名称'])
for group_name, group_data in grouped:
    if group_name[0] not in dfs:
        dfs[group_name[0]] = {}
    dfs[group_name[0]][group_name[1]] = pd.DataFrame(group_data)

In [6]:
'''
# 列出所有大类源和一级源的对应关系，用以制作对应的SectorMapping
dftest = ef[['排放源大类名称', '第一级排放源名称']]
gptest = dftest.groupby(['排放源大类名称', '第一级排放源名称'])
group_indices = gptest.groups.keys()
df_group_indices = pd.DataFrame(group_indices, columns=['排放源大类名称', '第一级排放源名称'])
df_group_indices.to_excel('D:/Download/indices.xlsx')
'''

"\n# 列出所有大类源和一级源的对应关系，用以制作对应的SectorMapping\ndftest = ef[['排放源大类名称', '第一级排放源名称']]\ngptest = dftest.groupby(['排放源大类名称', '第一级排放源名称'])\ngroup_indices = gptest.groups.keys()\ndf_group_indices = pd.DataFrame(group_indices, columns=['排放源大类名称', '第一级排放源名称'])\ndf_group_indices.to_excel('D:/Download/indices.xlsx')\n"

In [7]:
'''
ef = pd.read_excel(emisfile1,skiprows=1,sheet_name='ss')

species = ['SO2','NOx','CO','VOC','NH3','PM10','PM25','BC','OC'] #,'TSP','CO2']
species_columns = [f'{spec}排放量' for spec in species]
read_columns = ['排放源大类名称','第一级排放源名称']
read_columns.extend(species_columns)

ef = ef[read_columns].iloc[1:]

# 列出所有大类源和一级源的对应关系，用以制作对应的SectorMapping
dftest = ef[['排放源大类名称', '第一级排放源名称']]
gptest = dftest.groupby(['排放源大类名称', '第一级排放源名称'])
group_indices = gptest.groups.keys()
df_group_indices = pd.DataFrame(group_indices, columns=['排放源大类名称', '第一级排放源名称'])
df_group_indices.to_excel('D:/Download/ss_indices.xlsx')
'''

"\nef = pd.read_excel(emisfile1,skiprows=1,sheet_name='ss')\n\nspecies = ['SO2','NOx','CO','VOC','NH3','PM10','PM25','BC','OC'] #,'TSP','CO2']\nspecies_columns = [f'{spec}排放量' for spec in species]\nread_columns = ['排放源大类名称','第一级排放源名称']\nread_columns.extend(species_columns)\n\nef = ef[read_columns].iloc[1:]\n\n# 列出所有大类源和一级源的对应关系，用以制作对应的SectorMapping\ndftest = ef[['排放源大类名称', '第一级排放源名称']]\ngptest = dftest.groupby(['排放源大类名称', '第一级排放源名称'])\ngroup_indices = gptest.groups.keys()\ndf_group_indices = pd.DataFrame(group_indices, columns=['排放源大类名称', '第一级排放源名称'])\ndf_group_indices.to_excel('D:/Download/ss_indices.xlsx')\n"

设定本地与MEIC源分类的对应关系

In [8]:
smp = pd.read_excel(secmap).groupby('SourceType').get_group('point')
smp

Unnamed: 0,MEIC,LocalPrimarySource,LocalSecondarySource,SourceType
0,Transportation,移动源,民航飞机,point
1,Transportation,扬尘源,施工扬尘,point
2,Transportation,储存运输源,油气储运,point
14,Residential,化石燃料固定燃烧源,城市民用源,point
15,Residential,其它排放源,餐饮油烟,point
16,Residential,废弃物处理源,烟气脱硝,point
17,Residential,废弃物处理源,固废处理,point
18,Residential,废弃物处理源,废水处理,point
21,Power,化石燃料固定燃烧源,民用热力生产和供应,point
22,Power,化石燃料固定燃烧源,工业热力生产和供应,point


根据对应关系，将本地源分类映射到MEIC的五类中

In [12]:
smp_grouped = smp.groupby('MEIC')
sections = list(smp_grouped.groups.keys()) # ['Agriculture', 'Industry', 'Power', 'Residential', 'Transportation']
df_target = {}
for sec in sections:
    target_source_list = smp_grouped.get_group(sec)[['LocalPrimarySource','LocalSecondarySource']].reset_index(drop=True)
    df_temp = {}
    for n in range(len(target_source_list)):
        df_temp[n] = dfs[target_source_list.iloc[n,0]][target_source_list.iloc[n,1]]
    
    df_target[sec] = pd.concat(df_temp,axis=0).reset_index(drop=True)


将清单依照经纬度写入网格点，保存为nc文件

In [10]:
for sec in sections:
    temp = fp.assign_values_to_grid(df_target[sec],gridfile,'中心经度','中心纬度',species)
    temp.to_netcdf(f'D:/Download/{sec}.nc')
    print(f'{sec} finished!')

Complete SO2
Complete NOx
Complete CO
Complete VOCs
Complete NH3
Complete PM10
Complete PM25
Complete BC
Complete OC
Industry finished!
Complete SO2
Complete NOx
Complete CO
Complete VOCs
Complete NH3
Complete PM10
Complete PM25
Complete BC
Complete OC
Power finished!
Complete SO2
Complete NOx
Complete CO
Complete VOCs
Complete NH3
Complete PM10
Complete PM25
Complete BC
Complete OC
Residential finished!
Complete SO2
Complete NOx
Complete CO
Complete VOCs
Complete NH3
Complete PM10
Complete PM25
Complete BC
Complete OC
Transportation finished!
