In [2]:
import xarray as xr
import numpy as np
import his_utils
import pandas as pd
import os
from concurrent.futures import ProcessPoolExecutor, as_completed


input_data = xr.open_dataset('testdata/2021-06-26/ERA5_input.nc')

In [2]:
perturbations = [1.5, 2, 3, 4, 5]

original = input_data['2m_temperature'].squeeze('batch')

for scale in perturbations:
    input_data['2m_temperature'] = his_utils.add_perturbation(original, 
                                                          ["2m_temperature"], 
                                                          scale).expand_dims(dim='batch', axis=0)
    input_data.to_netcdf(f'testdata/2021-06-26/ERA5_input_perturbed_{scale}std.nc')

### STD

각 시간대별 std 따로 구하는 과정

In [None]:
# 설정
start_year = 1979
end_year = 2017
input_dir = '/camdata2/ERA5/hourly/v_'
output_dir = 'testdata/stats'
num_workers = 35



chunks = {'time': 876}

def process_year(year):
    filename = f"{year}.nc"
    filepath = os.path.join(input_dir, filename)
    
    if not os.path.exists(filepath):
        print(f"Warning: File for year {year} not found.")
        return None
    
    ds = xr.open_dataset(filepath, chunks=chunks)
    
    results = {}
    for hour in ['00', '06', '12', '18']:
        hourly_data = ds.sel(time=ds.time.dt.hour == int(hour))
        results[hour] = hourly_data
    
    return results

def try_this():
    with ProcessPoolExecutor(max_workers=num_workers) as executor:
        future_to_year = {executor.submit(process_year, year): year for year in range(start_year, end_year + 1)}
        
        datasets = {hour: [] for hour in ['00', '06', '12', '18']}
        for future in as_completed(future_to_year):
            year = future_to_year[future]
            try:
                data = future.result()
                if data:
                    for hour in ['00', '06', '12', '18']:
                        datasets[hour].append(data[hour])
            except Exception as exc:
                print(f'Year {year} generated an exception: {exc}')

    for hour in ['00', '06', '12', '18']:
        print(f"Combining data for {hour}:00")
        combined_data = xr.concat(datasets[hour], dim='time')
        output_file = os.path.join(output_dir, f'40yr_{hour}h.nc')
        print(f"Saving {output_file}")
        combined_data.to_netcdf(output_file)

    print("Processing complete.")

try_this()

name_mapping = {
    't2m': '2m_temperature',
    't': 'temperature',
    'u': 'u_wind',
    'v': 'v_wind',
    'z': 'geopotential',
    'q': 'specific_humidity',
    'longitude': 'lon',
    'latitude': 'lat'
}

for time in ["00", "06", "12", "18"]:
    hxx = xr.open_dataset(f'testdata/stats/40yr_{time}h_std_daily.nc')
    hxx = hxx.rename({k: v for k, v in name_mapping.items() 
                      if k in hxx.variables or k in hxx.coords})
    hxx.to_netcdf(f'testdata/stats/40yr_{time}h_std_daily.nc')

# Scratch Codes

In [None]:
import glob
files = sorted(glob.glob('testdata/stats/40yr_*h_std_daily.nc'))
print(files)
std = xr.open_mfdataset(files, combine='nested', concat_dim='hour')
std.compute().to_netcdf('testdata/stats/40yr_std_daily.nc')
std

In [None]:
std = xr.open_dataset('testdata/stats/40yr_std_daily.nc')

from matplotlib.colors import TwoSlopeNorm
import his_plot

mean_val = std['2m_temperature'].isel(hour=0).mean().values
std_val = std['2m_temperature'].isel(hour=0).std().values

import matplotlib.pyplot as plt
plot = std.isel(hour=0)['2m_temperature'].plot()
plt.show()

In [None]:

# 5x5 numpy 배열 생성
data = np.arange(9).reshape(3,3)

# xarray DataArray 생성
da = xr.DataArray(
    data,
    dims=("latitude", "longitude"),  # 차원 이름 지정
    coords={
        "latitude": np.linspace(35, 39, 3),  # y 좌표
        "longitude": np.linspace(125, 129, 3) # x 좌표
    },
    name="example_data"  # DataArray의 이름
)

# 5x5 numpy 배열 생성
data = np.random.uniform(low=0, high=10, size=(3, 3))
data = np.round(data, decimals=0)

# xarray DataArray 생성
da2 = xr.DataArray(
    data,
    dims=("latitude", "longitude"),  # 차원 이름을 위도와 경도로 지정
    coords={
        "latitude": np.linspace(35, 39, 3),   # 위도 좌표 (35도에서 39도)
        "longitude": np.linspace(125, 129, 3) # 경도 좌표 (125도에서 129도)
    },
    name="temperature"  # DataArray의 이름을 temperature로 지정
)

data = np.random.uniform(low=0, high=10, size=(3, 3))
data = np.round(data, decimals=0)

# xarray DataArray 생성
da3 = xr.DataArray(
    data,
    dims=("latitude", "longitude"),  # 차원 이름을 위도와 경도로 지정
    coords={
        "latitude": np.linspace(35, 39, 3),   # 위도 좌표 (35도에서 39도)
        "longitude": np.linspace(125, 129, 3) # 경도 좌표 (125도에서 129도)
    },
    name="temperature"  # DataArray의 이름을 temperature로 지정
)

print(da)
print(da2)
print(da3)

da + 0.1 * (da2 * da3)

In [None]:
input_data = xr.open_dataset('testdata/2021-06-26/ERA5_input.nc')

original = input_data['2m_temperature'].squeeze('batch')

normal_dist = np.random.normal(loc=0, scale=1, size=(721,1440))
normal_dist = xr.DataArray(
    data=normal_dist,
    dims=('lat','lon'),
    coords={
        'lat': original.lat,
        'lon': original.lon
    },
    attrs={'long_name': '2m_temperature'}
)
normal_dist