In [1]:
import os
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
from ydata_profiling import ProfileReport,compare

import sys
sys.path.append('../../src/')
from namelist import *
from mask import polygon_to_mask

# silence the warning note
import warnings
warnings.filterwarnings("ignore")

## 将网格文件转换为表格

In [2]:
month = "Sep"
low_years = np.array([2016,2015,2014]) # 此处年份倒置是为了方便后续创建dataarray时取用时间维度
high_years = np.array([2022,2021,2019])
low_files  = [os.path.join(processed_dir, f"{month}_{year}/{month}_{year}_mcip.nc") for year in low_years]
high_files = [os.path.join(processed_dir, f"{month}_{year}/{month}_{year}_mcip.nc") for year in high_years]

variables = ['SFC_TMP','AIR_TMP','RH','SOL_RAD',
             'WSPD10','WDIR10','PRES','CloudFRAC']
level = 1000 # units: hPa


In [3]:
shp = gpd.read_file(shp_PRD_adm)
grid = xr.open_dataset(grid_d03)
lon = grid.LON.squeeze()
lat = grid.LAT.squeeze()

mask = polygon_to_mask(shp.geometry[0], lon, lat)
mask_da = xr.DataArray(mask, dims=('y','x'))

In [4]:
def average_data(filelist, var, level):
    select_data = []
    for file in filelist:
        ds = xr.open_dataset(file)
        data = ds[var].sel(level=level,method='nearest')
        select_data.append(data)
        ds.close()
        
    averaged = (select_data[0].values 
                + select_data[1].values 
                + select_data[2].values) / 3
    
    avg_data = xr.DataArray(
        averaged,
        dims=data.dims,
        coords=data.coords,
        name=var
    )
    return avg_data

In [5]:
def nc_to_df(files,variables,level,mask_da):
    df = {}
    for var in variables:
        data = average_data(files, var, level)
        masked_data = data.where(mask_da)
        mean = masked_data.mean(dim=('x','y'),skipna=True)
        df[var] = mean.to_dataframe().drop(columns=['level'])
    df_concat = pd.concat(df,axis=1)
    df_concat.columns = df_concat.columns.droplevel(0)
    
    return df_concat

## `ProfileReport`

In [6]:
df_low  = nc_to_df(low_files,variables,level,mask_da)
df_high = nc_to_df(high_files,variables,level,mask_da)

reports_low = ProfileReport(df_low, title="Low")
reports_high = ProfileReport(df_high, title="High")


In [7]:
comparison = compare([reports_low,reports_high])

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

In [8]:
comparison.to_file('D:/Academic/Project/GRAD/Annually/data/profile_reports/Report_avgYear.html')

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]

## 850hPa

In [6]:
month = "Sep"
low_years = np.array([2016,2015,2014])
high_years = np.array([2022,2021,2019])
low_files  = [os.path.join(processed_dir, f"{month}_{year}/{month}_{year}_mcip.nc") for year in low_years]
high_files = [os.path.join(processed_dir, f"{month}_{year}/{month}_{year}_mcip.nc") for year in high_years]

variables = ['AIR_TMP','RH','SOL_RAD','PRES']
level = 850 # units: hPa

shp = gpd.read_file(shp_PRD_adm)
grid = xr.open_dataset(grid_d03)
lon = grid.LON.squeeze()
lat = grid.LAT.squeeze()

mask = polygon_to_mask(shp.geometry[0], lon, lat)
mask_da = xr.DataArray(mask, dims=('y','x'))

df_low  = nc_to_df(low_files,variables,level,mask_da)
df_high = nc_to_df(high_files,variables,level,mask_da)

reports_low = ProfileReport(df_low, title="Low")
reports_high = ProfileReport(df_high, title="High")

comparison = compare([reports_low,reports_high])
comparison.to_file('D:/Academic/Project/GRAD/Annually/data/profile_reports/Report_avgYear_850.html')


Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Summarize dataset:   0%|          | 0/5 [00:00<?, ?it/s]

Generate report structure:   0%|          | 0/1 [00:00<?, ?it/s]

Render HTML:   0%|          | 0/1 [00:00<?, ?it/s]

Export report to file:   0%|          | 0/1 [00:00<?, ?it/s]