# 安徽安庆市项目
## 本地排放清单预处理`Local Emission Inventory`

---
*@author: Evan*\
*@date: 2023-10-04*

In [1]:
import xarray as xr
import numpy as np
import pandas as pd
import os

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

import sys
sys.path.append('../../src/')
from namelist import *
import findpoint as fp

创建网格变量

In [2]:
grid = xr.open_dataset(progdir+'GRIDCRO2D_2023141.nc')
lat = grid.LAT[0,0,:,:]
lon = grid.LON[0,0,:,:]

gridfile = xr.Dataset(
    data_vars = dict(
        ShapeVar = (['y','x'],np.zeros_like(lat),{'long name':'not-used variable'})
    ),
    coords=dict(
        latitude = (['y','x'],lat.data),
        longitude = (['y','x'],lon.data)
    )
)
gridfile

## 将安庆的面源文件按二级分类写入一个excel

In [3]:
eftemp = pd.ExcelFile(emisatemp)
dfs = {}
for sheet in eftemp.sheet_names:
    primary = sheet
    dfsheet = eftemp.parse(sheet)
    dfsheet.fillna(0, inplace=True)
    grouped = dfsheet.groupby('所属排放源')
    secondary_list = grouped.groups.keys()
    
    for secondary in secondary_list:
        df_temp = grouped.get_group(secondary)
        if 'x' in df_temp.columns:
            df_temp = df_temp.rename(columns={'x': 'X'})
        if 'VOC' in df_temp.columns:
            df_temp = df_temp.rename(columns={'VOC': 'VOCs'})
        df_temp.drop(['所属排放源'],axis=1,inplace=True)
        dfs[f'{primary}-{secondary}'] = df_temp
        
writer = pd.ExcelWriter(emisdir+'AQ_2020_area.xlsx')
for name, df in dfs.items():
    df.to_excel(writer,sheet_name=name,index=False)
writer.save()

读取本地清单面源分类

In [3]:
ef = pd.ExcelFile(emisarea)
ef.sheet_names

['民用燃烧-民用燃烧',
 '道路移动源-道路移动源',
 '非道路移动源-农业机械',
 '非道路移动源-工程机械',
 '非道路移动源-船舶',
 '非道路移动源-铁路内燃机车',
 '农业源-人体粪便',
 '农业源-畜禽养殖源',
 '农业源-秸秆堆肥',
 '扬尘源-土壤扬尘',
 '扬尘源-道路扬尘',
 '储存运输源-油气运输源',
 '生物质燃烧源-生物质开放燃烧',
 '生物质燃烧源-生物质炉灶',
 '餐饮源-餐饮源',
 '溶剂使用源-其他溶剂',
 '溶剂使用源-农药使用',
 '溶剂使用源-表面涂层源']

检查是否有空值存在

In [4]:
def ifanynan(inputdf, name):
    has_nan = inputdf.isnull().values.any()

    if has_nan:
        # 展示包含NaN的行
        nan_rows = inputdf[inputdf.isnull().any(axis=1)]
        print(f"NaN rows in {name}:")
        print(nan_rows)
    else:
        print(f"There is no NaN in {name}")

In [5]:
for sheet in ef.sheet_names:
    dft = ef.parse(sheet)
    ifanynan(dft,sheet)

There is no NaN in 民用燃烧-民用燃烧
There is no NaN in 道路移动源-道路移动源
There is no NaN in 非道路移动源-农业机械
There is no NaN in 非道路移动源-工程机械
There is no NaN in 非道路移动源-船舶
There is no NaN in 非道路移动源-铁路内燃机车
There is no NaN in 农业源-人体粪便
There is no NaN in 农业源-畜禽养殖源
There is no NaN in 农业源-秸秆堆肥
There is no NaN in 扬尘源-土壤扬尘
There is no NaN in 扬尘源-道路扬尘
There is no NaN in 储存运输源-油气运输源
There is no NaN in 生物质燃烧源-生物质开放燃烧
There is no NaN in 生物质燃烧源-生物质炉灶
There is no NaN in 餐饮源-餐饮源
There is no NaN in 溶剂使用源-其他溶剂
There is no NaN in 溶剂使用源-农药使用
There is no NaN in 溶剂使用源-表面涂层源


设定本地清单与MEIC源分类的对应关系

In [6]:
sma = pd.read_excel(secmap).groupby('SourceType').get_group('area')

根据对应关系，将本地源分类映射到LEAQ自定义的分类中

In [7]:
sma_grouped = sma.groupby('LEAQ')
sections = list(sma_grouped.groups.keys())
species = ['SO2','NOx','CO','PM10','PM25','VOCs','NH3','BC','OC']

df_target = {}
for sec in sections:
    dfs = {}
    for sheet_name in ef.sheet_names:
        parts = sheet_name.split('-')
        primary = parts[0]
        secondary = parts[1] if len(parts) > 1 else None
        
        if secondary in sma_grouped.get_group(sec)['SourceName'].values:
            print(f'{primary}-{secondary} --> {sec}')
            
            current_df = ef.parse(sheet_name)
            coord_cols = ['X', 'Y']
        
            if primary not in dfs:
                dfs[primary] = current_df
            else:
                merged_df = pd.merge(dfs[primary], current_df, on=coord_cols,
                             how='outer', suffixes=('_x', '_y'))
                merged_df.fillna(0,inplace=True)

                for specie in species:
                    merged_df[specie] = merged_df[f'{specie}_x'] + merged_df[f'{specie}_y']
                    merged_df = merged_df.drop(columns=[f'{specie}_x',f'{specie}_y'],axis=1)
        
                dfs[primary] = merged_df
                ifanynan(dfs[primary],primary)
    
    df_target[sec] = pd.concat(dfs,axis=0).reset_index(drop=True)

生物质燃烧源-生物质开放燃烧 --> Biomass
生物质燃烧源-生物质炉灶 --> Biomass
There is no NaN in 生物质燃烧源
扬尘源-土壤扬尘 --> Dust
扬尘源-道路扬尘 --> Dust
There is no NaN in 扬尘源
非道路移动源-农业机械 --> NonRoad
非道路移动源-工程机械 --> NonRoad
There is no NaN in 非道路移动源
非道路移动源-船舶 --> NonRoad
There is no NaN in 非道路移动源
非道路移动源-铁路内燃机车 --> NonRoad
There is no NaN in 非道路移动源
民用燃烧-民用燃烧 --> Other
农业源-人体粪便 --> Other
农业源-畜禽养殖源 --> Other
There is no NaN in 农业源
农业源-秸秆堆肥 --> Other
There is no NaN in 农业源
储存运输源-油气运输源 --> Other
餐饮源-餐饮源 --> Other
道路移动源-道路移动源 --> Road
溶剂使用源-其他溶剂 --> Solvent
溶剂使用源-农药使用 --> Solvent
There is no NaN in 溶剂使用源
溶剂使用源-表面涂层源 --> Solvent
There is no NaN in 溶剂使用源


将清单依照经纬度写入网格点，保存为nc文件

In [11]:
for sec in sections:
    temp = fp.assign_values_to_grid(df_target[sec],gridfile,'X','Y',species)
    temp.to_netcdf(datadir+f'step1_preliminary/leaq_category/area_source/{sec}.nc')
    print(f'{sec} finished!')

Complete SO2
Complete NOx
Complete CO
Complete PM10
Complete PM25
Complete VOCs
Complete NH3
Complete BC
Complete OC
Biomass finished!
Complete SO2
Complete NOx
Complete CO
Complete PM10
Complete PM25
Complete VOCs
Complete NH3
Complete BC
Complete OC
Dust finished!
Complete SO2
Complete NOx
Complete CO
Complete PM10
Complete PM25
Complete VOCs
Complete NH3
Complete BC
Complete OC
NonRoad finished!
Complete SO2
Complete NOx
Complete CO
Complete PM10
Complete PM25
Complete VOCs
Complete NH3
Complete BC
Complete OC
Other finished!
Complete SO2
Complete NOx
Complete CO
Complete PM10
Complete PM25
Complete VOCs
Complete NH3
Complete BC
Complete OC
Road finished!
Complete SO2
Complete NOx
Complete CO
Complete PM10
Complete PM25
Complete VOCs
Complete NH3
Complete BC
Complete OC
Solvent finished!
