# 毕业论文`GRAD`
## 由于原验证结果偏差较大，使用多种手段尝试降低偏差

---
*@author: Evan*\
*@date: 2024-03-30*

In [2]:
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd

import matplotlib.pyplot as plt
from matplotlib import rcParams
config = {
    "font.family":'Times New Roman',
    "mathtext.fontset":'stix',
    "font.serif": ['SimSun'],
}
rcParams.update(config)

import sys
sys.path.append('../../src/')
from namelist import *
from mask import polygon_to_mask
import ModelEvalLib as me

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

In [10]:
simvar = 'O3'
obsvar = 'O3'

In [73]:
def calculate_hour(year,month):
    ds = xr.open_dataset(datadir + f'processed/{month}_{year}/{month}_{year}_chem.nc')
    data_sim = ds[simvar][:,0,:,:]
    shp = gpd.read_file(shp_files['PRD_merge_adm'])
    lon = data_sim.longitude
    lat = data_sim.latitude
    mask    = polygon_to_mask(shp.geometry[0], lon, lat)
    mask_da = xr.DataArray(mask, dims=('y','x'))
    masked_sim  = data_sim.where(mask_da)
    obspath = get_obspath(month)
    df = pd.read_excel(obspath + f'site_{obsvar}_{year}.xlsx',index_col=0)
    
    sim_hour    = masked_sim.mean(dim=('x','y'),skipna=True)
    
    obs = df.mean(axis=1,skipna=True)
    obs_hour = obs.interpolate(method='linear')
    
    return sim_hour, obs_hour

def calculate_mda8(year,month):
    ds = xr.open_dataset(datadir + f'processed/{month}_{year}/{month}_{year}_chem.nc')
    data_sim = ds[simvar][:,0,:,:]
    shp = gpd.read_file(shp_files['PRD_merge_adm'])
    lon = data_sim.longitude
    lat = data_sim.latitude
    mask    = polygon_to_mask(shp.geometry[0], lon, lat)
    mask_da = xr.DataArray(mask, dims=('y','x'))
    masked_sim  = data_sim.where(mask_da)
    obspath = get_obspath(month)
    df = pd.read_excel(obspath + f'site_{obsvar}_{year}.xlsx',index_col=0)
    
    sim_hour    = masked_sim.mean(dim=('x','y'),skipna=True)
    sim_mda8    = sim_hour.rolling(time=8).mean().resample(time='D').max()
    
    obs = df.mean(axis=1,skipna=True)
    obs_hour = obs.interpolate(method='linear')
    obs_mda8 = obs_hour.rolling(8).mean().resample('D').max()
    
    return sim_mda8, obs_mda8

def calculate_hmax(year,month):
    ds = xr.open_dataset(datadir + f'processed/{month}_{year}/{month}_{year}_chem.nc')
    data_sim = ds[simvar][:,0,:,:]
    shp = gpd.read_file(shp_files['PRD_merge_adm'])
    lon = data_sim.longitude
    lat = data_sim.latitude
    mask    = polygon_to_mask(shp.geometry[0], lon, lat)
    mask_da = xr.DataArray(mask, dims=('y','x'))
    masked_sim  = data_sim.where(mask_da)
    obspath = get_obspath(month)
    df = pd.read_excel(obspath + f'site_{obsvar}_{year}.xlsx',index_col=0)
    
    sim_hour    = masked_sim.mean(dim=('x','y'),skipna=True)
    sim_hmax    = sim_hour.resample(time='D').max()
    
    obs = df.mean(axis=1,skipna=True)
    obs_hour = obs.interpolate(method='linear')
    obs_hmax = obs_hour.resample('D').max()
    
    return sim_hmax, obs_hmax

In [74]:
def print_metrics(year,month,case):
    if case == 'hour':
        sim, obs = calculate_hour(year,month)
    elif case == 'mda8':
        sim, obs = calculate_mda8(year,month)
    elif case == 'hmax':
        sim, obs = calculate_hmax(year,month)
    
    metrics = me.CalculateMetrics(obs,sim)
    mb   = metrics.get_mb().values
    r    = metrics.get_r()
    rmse = metrics.get_rmse().values
    ioa  = metrics.get_ioa().values
    nmb  = metrics.get_nmb().values
    nme  = metrics.get_nme().values
    
    sim90 = sim.quantile(0.9).values
    obs90 = obs.quantile(0.9)
    mb90 = sim90 - obs90
    
    dfout = pd.DataFrame(
        data=[mb,mb90,r,rmse,ioa,nmb,nme],
        index=['MB','MB90','R','RMSE','IOA','NMB','NME'],
        columns=[f'{year}_{month}'],
    )
    return dfout
 

In [75]:
def generate_df(case):
    years = [2014,2015,2016,2019,2021,2022]
    months = ['Sep','Jul']
    df = {}
    for month in months:
        for year in years:
            if year <= 2016 and month == 'Jul':
                continue
            else:
                df[f'{year}_{month}'] = print_metrics(year,month,case)
                dfout = pd.concat(df.values(),axis=1)
    return dfout


# 小时均值与90百分位

In [71]:
case = 'hour'

df_hour = generate_df(case)
df_hour

Unnamed: 0,2014_Sep,2015_Sep,2016_Sep,2019_Sep,2021_Sep,2022_Sep,2019_Jul,2021_Jul,2022_Jul
MB,30.182362,34.25584,42.370841,20.614152,27.318352,22.161045,34.41193,24.071802,19.027575
MB90,15.575411,25.323429,27.330338,-13.142573,4.110339,-3.533482,39.395989,19.365676,2.796104
R,0.867349,0.84284,0.745083,0.843353,0.888256,0.863924,0.80969,0.869383,0.879076
RMSE,37.878122,42.388411,52.453848,40.538801,36.268646,34.641335,40.200767,31.41342,29.143222
IOA,0.834223,0.804397,0.705315,0.834241,0.859449,0.859611,0.727719,0.846269,0.894856
NMB,48.699816,49.863448,64.328043,21.463778,39.118752,20.961145,70.986372,41.132562,30.683665
NME,61.117071,61.701373,79.636215,42.209634,51.935205,32.765695,82.927827,53.677513,46.99605


# MDA8与90百分位

In [72]:
case = 'mda8'

df_mda8 = generate_df(case)
df_mda8

Unnamed: 0,2014_Sep,2015_Sep,2016_Sep,2019_Sep,2021_Sep,2022_Sep,2019_Jul,2021_Jul,2022_Jul
MB,25.319479,34.013233,43.390154,1.490213,17.17322,7.764042,37.212989,21.265106,21.610256
MB90,12.857344,16.384688,12.044491,-34.248558,-1.91693,-11.13902,29.393773,-10.734404,-11.394966
R,0.824115,0.841826,0.58751,0.87343,0.898479,0.850453,0.772161,0.823025,0.914302
RMSE,33.838942,41.243691,58.864366,31.394978,27.183943,22.099121,42.777997,30.676664,31.829231
IOA,0.754279,0.701282,0.564079,0.814343,0.821968,0.853754,0.607426,0.747967,0.856564
NMB,22.817493,30.144848,40.210085,0.963843,13.805228,4.795215,44.547936,21.266893,20.953122
NME,30.495091,36.552973,54.55019,20.305703,21.852659,13.648822,51.209846,30.679242,30.861354


# 每日最大值及第90百分位

In [77]:
case = 'hmax'

df_hmax = generate_df(case)
df_hmax

Unnamed: 0,2014_Sep,2015_Sep,2016_Sep,2019_Sep,2021_Sep,2022_Sep,2019_Jul,2021_Jul,2022_Jul
MB,18.379939,29.701748,39.545537,-6.850664,11.100092,5.538178,32.342907,16.695316,20.133051
MB90,-3.890023,11.510628,7.313839,-47.679633,-12.950283,-18.008861,18.588946,-19.21855,-21.534005
R,0.809715,0.812791,0.557766,0.89463,0.873648,0.836178,0.75511,0.824537,0.913207
RMSE,33.586895,41.095736,60.132021,32.137303,27.859457,25.727802,41.487847,31.127107,32.466822
IOA,0.791434,0.719912,0.564736,0.818454,0.820249,0.81976,0.645321,0.76445,0.864992
NMB,14.271422,23.033153,32.016097,-3.974032,7.857776,3.185056,33.174963,14.752609,17.506109
NME,26.079127,31.868979,48.68293,18.642672,19.721763,14.796291,42.555166,27.505083,28.230582


In [78]:
path = 'D:/Academic/Project/GRAD/Annually/data/evaluation/'
df_hour.to_excel(path + f'hour.xlsx',index=True)
df_mda8.to_excel(path + f'mda8.xlsx',index=True)
df_hmax.to_excel(path + f'hmax.xlsx',index=True)