# Compute Principal components from MPAS Experiments

##  import package

In [19]:
import numpy as np
import joblib as jl
import netCDF4 as nc
from scipy.interpolate import interp1d

## functions

### load data

In [20]:
def load_data(path, var, lim):
    
    with nc.Dataset(f'{path}{var}.nc', 'r', mmap=True) as f:
        data = f.variables[var][:, :, lim, :]

    data -= data.mean(axis=0)

    return data

### normal equation

In [21]:
def normal_equation(eof, data):
    comp1 = np.linalg.inv(eof.T @ eof)
    comp2 = eof.T @ data

    return comp1 @ comp2

## load data


In [22]:
# case name
case = 'NSC'

# file path
path  = f'/work/b11209013/2024_Research/MPAS/merged_data/{case}/'

# variable list
var_list = ['q1', 'theta', 'qv']

# load coordinate
with nc.Dataset(f'{path}q1.nc', 'r', mmap=True) as f:
    lon  = f.variables['lon'][:]
    lat  = f.variables['lat'][:]
    lev  = f.variables['lev'][:]
    time = f.variables['time'][:]

lat_lim = np.where((lat >= -5) & (lat <= 5))[0]
lat = lat[lat_lim]

# load variable
data = jl.Parallel(n_jobs=3)(
    jl.delayed(load_data)(path, var, lat_lim)
    for var in var_list
)

data = {
    var: data[i]
    for i, var in enumerate(var_list)
}

ltime, llev, llat, llon = data['q1'].shape

# load EOF data
eof = jl.load('/work/b11209013/2024_Research/MPAS/PC/CNTL_EOF.joblib')

eof1 = eof[:, 0]
eof2 = eof[:, 1]

### convert unit

In [23]:
data = {
    't': data['theta']*(1000/lev[None, :, None, None])**(-0.286),
    'q1': data['q1']*86400/1004.5,
    'qv': data['qv']*1000,
}

var_list = data.keys()

## Process data

### permute and reshape

In [24]:
data_pm = {
    var: data[var].transpose((1, 0, 2, 3))
    for var in var_list
}

data_rs = {
    var: data_pm[var].reshape((llev, -1))
    for var in var_list
}

### interpolate

In [25]:
lev_itp = np.linspace(150, 1000, 18)

data_itp = {
    var: interp1d(lev[::-1], data_rs[var][::-1], axis=0)(lev_itp)
    for var in var_list
}

## Apply PCA on data

In [26]:
data_eof = {
    var: normal_equation(eof, data_itp[var])
    for var in var_list
}

pc1 = {
    var: data_eof[var][0, :].reshape((ltime, llat, llon))
    for var in var_list
}

pc2 = {
    var: data_eof[var][1, :].reshape((ltime, llat, llon))
    for var in var_list
}

print(pc1.keys())

dict_keys(['t', 'q1', 'qv'])


## save file

In [27]:
output_dict = {
    "lon": lon,
    "lat": lat,
    "time": time,
    "pc1": pc1,
    "pc2": pc2,
}

jl.dump(output_dict, f'/work/b11209013/2024_Research/MPAS/PC/{case}_PC.joblib')

['/work/b11209013/2024_Research/MPAS/PC/NSC_PC.joblib']

### 