# 毕业论文`GRAD`
## 计算站点验证结果并平均

---
*@author: Evan*\
*@date: 2024-03-30*

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd

import matplotlib.pyplot as plt
from matplotlib import rcParams
config = {
    "font.family":'Times New Roman',
    "mathtext.fontset":'stix',
    "font.serif": ['SimSun'],
}
rcParams.update(config)

import sys
sys.path.append('../../../src/')
from namelist import *
from mask import polygon_to_mask
import ModelEvalLib as me
from findpoint import findpoint

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

In [2]:
simvar = 'O3'
obsvar = 'O3'

In [68]:
sites = pd.read_excel(obs_dir + 'sitelocation.xlsx',index_col=0,
                      usecols=['监测点编码','经度','纬度'])

In [87]:
years = [2014,2015,2016,2019,2021,2022]
months = ['Sep','Jul']
columns = ['2014_Sep','2015_Sep','2016_Sep',
           '2019_Sep','2021_Sep','2022_Sep',
           '2019_Jul','2021_Jul','2022_Jul']
dfout = pd.DataFrame(index=sites.index,columns=columns)

for month in months:
    for year in years:
        if year <= 2016 and month == 'Jul':
            continue
        else:
            ds = xr.open_dataset(datadir + f'processed/{month}_{year}/{month}_{year}_chem.nc')
            obspath = get_obspath(month)
            df = pd.read_excel(obspath + f'site_{obsvar}_{year}.xlsx',index_col=0)

            for site in sites.index:
                lon,lat = sites.loc[site]
                xloc, yloc = findpoint(lon,lat,ds[simvar])
                sim = ds[simvar][:,0,:,:].sel(x=xloc,y=yloc)
                obs = df[site].interpolate(method='linear')
                
                metrics = me.CalculateMetrics(obs,sim)
                mb = metrics.get_mb().values
                if mb is not None:
                    dfout.loc[site,f'{year}_{month}'] = mb

dfout = dfout.astype(float)
dfmean = dfout.mean(axis=0,skipna=True)

# 计算MDA8、Hmax以及百分位

In [113]:
def load_data(year,month):
    ds = xr.open_dataset(datadir + f'processed/{month}_{year}/{month}_{year}_chem.nc')
    obspath = get_obspath(month)
    df = pd.read_excel(obspath + f'site_{obsvar}_{year}.xlsx',index_col=0)
    
    return ds,df

def calculate_hour(site,year,month):
    ds,df = load_data(year,month)
    lon,lat = sites.loc[site]
    xloc, yloc = findpoint(lon,lat,ds[simvar])
    sim = ds[simvar][:,0,:,:].sel(x=xloc,y=yloc)
    obs = df[site].interpolate(method='linear')
    
    return sim, obs

def calculate_mda8(site,year,month):
    ds,df = load_data(year,month)
    lon,lat = sites.loc[site]
    xloc, yloc = findpoint(lon,lat,ds[simvar])
    sim = ds[simvar][:,0,:,:].sel(x=xloc,y=yloc)
    sim_mda8 = sim.rolling(time=8).mean().resample(time='1D').max()
    obs = df[site].interpolate(method='linear')
    obs_mda8 = obs.rolling(window=8).mean().resample('1D').max()
    
    return sim_mda8, obs_mda8

def calculate_mda1(site,year,month):
    ds,df = load_data(year,month)
    lon,lat = sites.loc[site]
    xloc, yloc = findpoint(lon,lat,ds[simvar])
    sim = ds[simvar][:,0,:,:].sel(x=xloc,y=yloc)
    sim_mda1 = sim.resample(time='1D').max()
    obs = df[site].interpolate(method='linear')
    obs_mda1 = obs.resample('1D').max()
    
    return sim_mda1, obs_mda1

def print_metrics(sites,year,month,method):
    dfs = {}
    for site in sites.index:
        if method == 'hour':
            sim, obs = calculate_hour(site,year,month)
        elif method == 'mda8':
            sim, obs = calculate_mda8(site,year,month)
        elif method == 'mda1':
            sim, obs = calculate_mda1(site,year,month)
        # sim, obs = calculate_hour(site,year,month)
        metrics = me.CalculateMetrics(obs,sim)
        mb   = metrics.get_mb().values
        r    = metrics.get_r()
        rmse = metrics.get_rmse().values
        ioa  = metrics.get_ioa().values
        nmb  = metrics.get_nmb().values
        nme  = metrics.get_nme().values
        
        sim90 = sim.quantile(0.9).values
        obs90 = obs.quantile(0.9)
        mb90 = sim90 - obs90
     
        dfs[site] = pd.DataFrame(
            data=[mb,mb90,r,rmse,ioa,nmb,nme],
            index=['MB','MB90','R','RMSE','IOA','NMB','NME'],
            columns=[f'{site}'],
        )
    dfout = pd.concat(dfs,axis=1)
    dfmean = dfout.mean(axis=1)
    return dfmean


## Hour

In [110]:
dfs = {}
for month in months:
    for year in years:
        if year <= 2016 and month == 'Jul':
            continue
        else:
            print(f'{year}_{month}')
            dfs[f'{year}_{month}'] = print_metrics(sites,year,month,'hour')
dfout = pd.concat(dfs,axis=1)


2014_Sep
2015_Sep
2016_Sep
2019_Sep
2021_Sep
2022_Sep
2019_Jul
2021_Jul
2022_Jul


In [112]:
dfout

Unnamed: 0,2014_Sep,2015_Sep,2016_Sep,2019_Sep,2021_Sep,2022_Sep,2019_Jul,2021_Jul,2022_Jul
MB,26.22178,32.332831,41.367709,24.541599,28.53113,24.774965,35.281851,23.401754,19.186655
MB90,39.36579,45.507876,55.773636,12.209614,37.744759,25.046872,60.29725,33.246893,32.141199
R,0.717521,0.729573,0.613565,0.714293,0.805256,0.753605,0.700149,0.773896,0.786111
RMSE,51.553613,55.869396,66.284234,54.805815,48.909325,49.28215,51.108659,41.866237,41.776723
IOA,0.798777,0.767719,0.685391,0.801045,0.837591,0.81674,0.703395,0.821003,0.841578
NMB,44.523377,51.982153,65.51072,26.137109,41.828023,24.39733,74.509288,40.538216,31.535104
NME,85.48323,86.953762,104.017058,57.779074,71.279097,47.468613,107.194173,72.358812,68.142228


## MDA8

In [114]:
dfs = {}
for month in months:
    for year in years:
        if year <= 2016 and month == 'Jul':
            continue
        else:
            print(f'{year}_{month}')
            dfs[f'{year}_{month}'] = print_metrics(sites,year,month,'mda8')
dfout = pd.concat(dfs,axis=1)


2014_Sep
2015_Sep
2016_Sep
2019_Sep
2021_Sep
2022_Sep
2019_Jul
2021_Jul
2022_Jul


In [115]:
dfout

Unnamed: 0,2014_Sep,2015_Sep,2016_Sep,2019_Sep,2021_Sep,2022_Sep,2019_Jul,2021_Jul,2022_Jul
MB,37.677853,46.857116,61.358504,21.830316,37.210214,26.136395,50.067546,32.359783,36.076404
MB90,34.866066,43.709613,44.040846,17.756592,36.634679,28.19205,61.865974,17.59123,39.001173
R,0.749309,0.70876,0.417556,0.713189,0.788698,0.743059,0.700722,0.768748,0.833068
RMSE,54.004984,61.004118,80.214201,46.071204,48.149692,40.592094,59.406959,44.170962,47.588988
IOA,0.736657,0.667667,0.543783,0.770361,0.754285,0.772235,0.632367,0.763353,0.8103
NMB,36.717394,44.995397,57.158451,14.421916,30.013736,16.015914,60.598161,32.646315,34.941024
NME,50.64346,57.108154,74.195256,29.827004,38.925562,24.776919,71.405305,44.329059,46.3312


## MDA1

In [116]:
dfs = {}
for month in months:
    for year in years:
        if year <= 2016 and month == 'Jul':
            continue
        else:
            print(f'{year}_{month}')
            dfs[f'{year}_{month}'] = print_metrics(sites,year,month,'mda1')
dfout = pd.concat(dfs,axis=1)


2014_Sep
2015_Sep
2016_Sep
2019_Sep
2021_Sep
2022_Sep
2019_Jul
2021_Jul
2022_Jul


In [117]:
dfout

Unnamed: 0,2014_Sep,2015_Sep,2016_Sep,2019_Sep,2021_Sep,2022_Sep,2019_Jul,2021_Jul,2022_Jul
MB,38.340128,53.723039,69.296597,21.506634,41.325285,32.823805,54.494546,34.008981,42.906961
MB90,31.9247,51.126013,62.070678,17.995786,41.929422,39.424443,60.209205,21.96175,45.570678
R,0.706614,0.63771,0.413016,0.690699,0.750712,0.748447,0.626155,0.76077,0.802814
RMSE,67.139879,74.764602,93.923271,52.309253,57.147633,48.886786,70.468377,51.548548,58.120298
IOA,0.734939,0.64647,0.545986,0.769708,0.745942,0.767922,0.637795,0.782987,0.794221
NMB,31.502066,42.925642,53.208214,12.41713,27.424788,17.797201,53.603697,28.169087,35.228397
NME,50.450321,57.846965,71.404613,29.266867,37.892098,26.496535,68.023096,42.291237,47.983449
