# 毕业论文`GRAD`
## 计算站点验证结果并平均

---
*@author: Evan*\
*@date: 2024-03-30*

In [1]:
import numpy as np
import pandas as pd
import xarray as xr
import geopandas as gpd

import matplotlib.pyplot as plt
from matplotlib import rcParams
config = {
    "font.family":'Times New Roman',
    "mathtext.fontset":'stix',
    "font.serif": ['SimSun'],
}
rcParams.update(config)

import sys
sys.path.append('../../../src/')
from namelist import *
from mask import polygon_to_mask
import ModelEvalLib as me
from findpoint import findpoint

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

In [2]:
simvar = 'O3'
obsvar = 'O3'

In [68]:
sites = pd.read_excel(obs_dir + 'sitelocation.xlsx',index_col=0,
                      usecols=['监测点编码','经度','纬度'])

In [87]:
years = [2014,2015,2016,2019,2021,2022]
months = ['Sep','Jul']
columns = ['2014_Sep','2015_Sep','2016_Sep',
           '2019_Sep','2021_Sep','2022_Sep',
           '2019_Jul','2021_Jul','2022_Jul']
dfout = pd.DataFrame(index=sites.index,columns=columns)

for month in months:
    for year in years:
        if year <= 2016 and month == 'Jul':
            continue
        else:
            ds = xr.open_dataset(datadir + f'processed/{month}_{year}/{month}_{year}_chem.nc')
            obspath = get_obspath(month)
            df = pd.read_excel(obspath + f'site_{obsvar}_{year}.xlsx',index_col=0)

            for site in sites.index:
                lon,lat = sites.loc[site]
                xloc, yloc = findpoint(lon,lat,ds[simvar])
                sim = ds[simvar][:,0,:,:].sel(x=xloc,y=yloc)
                obs = df[site].interpolate(method='linear')
                
                metrics = me.CalculateMetrics(obs,sim)
                mb = metrics.get_mb().values
                if mb is not None:
                    dfout.loc[site,f'{year}_{month}'] = mb

dfout = dfout.astype(float)
dfmean = dfout.mean(axis=0,skipna=True)

In [101]:
dfmean

2014_Sep    26.221780
2015_Sep    32.332831
2016_Sep    41.367709
2019_Sep    24.541599
2021_Sep    28.531130
2022_Sep    24.774965
2019_Jul    35.281851
2021_Jul    23.401754
2022_Jul    19.186655
dtype: float64

# 计算MDA8、Hmax以及百分位

In [102]:
def calculate_hour(sites,years,months,columns,obsvar,simvar):
    dfout = pd.DataFrame(index=sites.index,columns=columns)

    for month in months:
        for year in years:
            if year <= 2016 and month == 'Jul':
                continue
            else:
                ds = xr.open_dataset(datadir + f'processed/{month}_{year}/{month}_{year}_chem.nc')
                obspath = get_obspath(month)
                df = pd.read_excel(obspath + f'site_{obsvar}_{year}.xlsx',index_col=0)

                for site in sites.index:
                    lon,lat = sites.loc[site]
                    xloc, yloc = findpoint(lon,lat,ds[simvar])
                    sim = ds[simvar][:,0,:,:].sel(x=xloc,y=yloc)
                    obs = df[site].interpolate(method='linear')
                    
                    metrics = me.CalculateMetrics(obs,sim)
                    mb = metrics.get_mb().values
                    if mb is not None:
                        dfout.loc[site,f'{year}_{month}'] = mb

    dfout = dfout.astype(float)
    dfmean = dfout.mean(axis=0,skipna=True)
    
    return dfout, dfmean

def calculate_mda8(sites,years,months,columns,obsvar,simvar):
    dfout = pd.DataFrame(index=sites.index,columns=columns)

    for month in months:
        for year in years:
            if year <= 2016 and month == 'Jul':
                continue
            else:
                ds = xr.open_dataset(datadir + f'processed/{month}_{year}/{month}_{year}_chem.nc')
                obspath = get_obspath(month)
                df = pd.read_excel(obspath + f'site_{obsvar}_{year}.xlsx',index_col=0)

                for site in sites.index:
                    lon,lat = sites.loc[site]
                    xloc, yloc = findpoint(lon,lat,ds[simvar])
                    sim = ds[simvar][:,0,:,:].sel(x=xloc,y=yloc)
                    
                    obs = df[site].interpolate(method='linear')
                    
                    metrics = me.CalculateMetrics(obs,sim)
                    mb = metrics.get_mb().values
                    if mb is not None:
                        dfout.loc[site,f'{year}_{month}'] = mb

    dfout = dfout.astype(float)
    dfmean = dfout.mean(axis=0,skipna=True)
    
    return dfout, dfmean

def calculate_hmax(sites,years,months,columns,obsvar,simvar):
    dfout = pd.DataFrame(index=sites.index,columns=columns)

    for month in months:
        for year in years:
            if year <= 2016 and month == 'Jul':
                continue
            else:
                ds = xr.open_dataset(datadir + f'processed/{month}_{year}/{month}_{year}_chem.nc')
                obspath = get_obspath(month)
                df = pd.read_excel(obspath + f'site_{obsvar}_{year}.xlsx',index_col=0)

                for site in sites.index:
                    lon,lat = sites.loc[site]
                    xloc, yloc = findpoint(lon,lat,ds[simvar])
                    sim = ds[simvar][:,0,:,:].sel(x=xloc,y=yloc)
                    obs = df[site].interpolate(method='linear')
                    
                    metrics = me.CalculateMetrics(obs,sim)
                    mb = metrics.get_mb().values
                    if mb is not None:
                        dfout.loc[site,f'{year}_{month}'] = mb

    dfout = dfout.astype(float)
    dfmean = dfout.mean(axis=0,skipna=True)
    
    return dfout, dfmean