# Data comparison by plotting

`line 3` *target_var_list* 입력

`line 7` *max_time_step* 입력

`line 9, 10` *dataset_1, dataset_2* 입력

`line 17` domain level 설정

`line 26` norm 스케일 조정

`line 28` region 설정 

# Header

In [31]:
import xarray as xr
import numpy as np
import lib.his_utils as his_utils
import lib.his_plot as his_plot
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import matplotlib.pyplot as plt
from matplotlib.colors import TwoSlopeNorm
from multiprocessing import Pool


# Data load

In [27]:

dataset_1 = xr.open_dataset("/data/GC_output/percent2/GC_11111111111_100_30_long.nc")#.resample(time='1d').mean()
# dataset_1 = his_utils.convert_scale(dataset_1).resample(time='1d').mean()

dataset_2 = xr.open_dataset('/data/GC_output/2021-06-21/GC_output_15.nc')#.resample(time='1d').mean()
# dataset_2 = his_utils.convert_scale(dataset_2).resample(time='1d').mean()


# Main routine

### stat calculation
dataset 하나만 출력하고 싶으면 dataset2를 주석처리

In [None]:
# 서로 다른 dataset별로 어떻게 생겨먹었는지 다 plot해서 뽑아버리기
# ['10m_u_component_of_wind',
#  '10m_v_component_of_wind',
#  '2m_temperature',
#  'geopotential_at_surface',
#  'land_sea_mask',
#  'mean_sea_level_pressure',
#  'total_precipitation_6hr',
#  'geopotential',
#  'specific_humidity',
#  'temperature',
#  'u_component_of_wind',
#  'v_component_of_wind',
#  'vertical_velocity',
#  'toa_incident_solar_radiation']


target_var_list = [
 '2m_temperature']

max_time_step = 13

for var in target_var_list:
    diff = dataset_1[var] - dataset_2[var]
    
    #####################################################
    # print mean and std of each variable
    #####################################################
    print("====================================")
    print(f"{var} std: {dataset_1[var].std().values}")
    print(f"{var} mean: {dataset_1[var].mean().values}")
    print("------------------------------------")
    print(f"{var} std: {dataset_2[var].std().values}")
    print(f"{var} mean: {dataset_2[var].mean().values}")
    print("------------------------------------")
    weights = np.cos(np.deg2rad(diff.lat))
    weights.name = "weights"
    weighted = diff.weighted(weights)
    mean_val = float(diff.mean(('lat', 'lon', 'time')))
    if 'level' in dataset_1[var].dims:
        std_all = diff.std(('lat', 'lon', 'time'))
        std_val = std_all.max().values
    else:
        print("not pressure level variables")
        std_val = (diff.std(('lat', 'lon')).max()).values.item()
        norm = TwoSlopeNorm(vmin=mean_val - std_val, 
                            vcenter=0,
                            vmax=mean_val + std_val)

    # print(f"{var} std: {std_val}")
    # print(f"{var} mean: {mean_val}")

    #####################################################
    # argument list for multiprocessing pool
    #####################################################
    title = "test"

    # level variables
    if 'level' in dataset_1[var].dims:
        mean_val = mean_val[0]
        std_all = std_all[0]
        arg_list = [(diff.isel(time=time_index, level=level_index), 
                     var, 
                     "platecarree", 
                     "RdBu_r", 
                     f'{var} {title} at t:{time_index}, level:{level_index}\n mean:{mean_val[level_index].values}, std:{std_all[level_index].values}', 
                     f'figure/{title} {var}_{time_index}_{level_index}.png',
                     TwoSlopeNorm(vmin=mean_val[level_index] - std_val, 
                            vcenter=mean_val[level_index], 
                            vmax=mean_val[level_index] + std_val))
            for time_index in range(max_time_step)
            for level_index in range(len(diff.level))]
    
    # time-independent variables
    elif 'time' not in dataset_1[var].dims:
        arg_list = [(diff, 
                     var, 
                     "platecarree", 
                     "RdBu_r", 
                     f'{var} {title}', 
                     f'figure/{title} {var}.png',
                     norm)]
    
    # surface variables
    else:
        arg_list = [(diff.isel(time=time_index*4), 
                     var, 
                     "platecarree", 
                     "RdBu_r", 
                     f'{var} {title} at t:{time_index*4}', 
                     f'figure/{title} {var}_{time_index*4}.png',
                     norm)
            for time_index in range(max_time_step)]


    #####################################################
    # multiprocessing pool
    #####################################################
    with Pool() as pool:
        pool.map(his_plot.plot, arg_list)

# 움짤 만들기

In [None]:
from PIL import Image
from pathlib import Path
import os

surface = ['10m_u_component_of_wind',
 '10m_v_component_of_wind',
 '2m_temperature',
 'mean_sea_level_pressure',
 'total_precipitation_6hr']

pressure = ['geopotential',
 'specific_humidity',
 'temperature',
 'u_component_of_wind',
 'v_component_of_wind',
 'vertical_velocity']

# title = "wiped GC - control"

def process_var(args):
    var, duration, level = args
    image_frames = []
    for time_index in range(0,40):
        if level is not None:
            filename = f'{title} {var}_{time_index}_{level}.png'
        else:
            filename = f'{title} {var}_{time_index}.png'
        
        file_path = Path("figure") / filename
        if file_path.exists():
            with Image.open(file_path) as img:
                image_frames.append(img.copy())
        else:
            print(f"Warning: File not found - {file_path}")
    
    if image_frames:
        output_filename = f'{title}_{var}.gif'
        if level is not None:
            output_filename = f'{title}_{var}_{level}.gif'
        his_plot.save_gif(image_frames, output_filename, duration=duration)
    else:
        print(f"No images found for variable: {var}")

duration = 600
with Pool() as pool:
    # Process surface variables
    surface_args = [(var, duration, None) for var in surface]
    pool.map(process_var, surface_args)
    
    # Process pressure variables
    # pressure_args = [(var, duration, level) 
    #                  for level in range(37) 
    #                  for var in pressure]
    # pool.map(process_var, pressure_args)
