# Import

In [1]:
# System
import os
import time
import json

# Below blocks should be executed before any modules that with computational cost
# Processor Setting
os.environ["OMP_NUM_THREADS"] = "4" # export OMP_NUM_THREADS=4
os.environ["OPENBLAS_NUM_THREADS"] = "4" # export OPENBLAS_NUM_THREADS=4 
os.environ["MKL_NUM_THREADS"] = "6" # export MKL_NUM_THREADS=6
os.environ["VECLIB_MAXIMUM_THREADS"] = "4" # export VECLIB_MAXIMUM_THREADS=4
os.environ["NUMEXPR_NUM_THREADS"] = "6" # export NUMEXPR_NUM_THREADS=6

# Data processing
import numpy as np

# Config

In [2]:
# Config path
root = '/Volumes/Expansion/User_Backup/b08209033/111-2_IVT_analysis/'
file = 'config.json'
config_path = os.path.join(root, file)

# Import config
with open(config_path) as infile:
    config = json.load(infile)
    infile.close()

# Update config
    # IVT variable
config.update({"Flag_IVT_filtered": True})
config.update({"Fname_IVT_svd": "IVT_SVD.npz"})

# Export config
with open(config_path, 'w') as outfile:
    json.dump(config, outfile, sort_keys=True)
    outfile.close()

# Read IVT

In [3]:
os.chdir(config["Path_IVT_calculation"])
with np.load(config["Fname_IVT"]) as dataset:
    IVT_original = dataset['IVT_original']
    IVT_seasonal = dataset['IVT_seasonal']

if (config["Flag_IVT_filtered"]):
    IVT = IVT_original - IVT_seasonal[:,np.newaxis,:,:,:]
else:
    IVT = np.copy(IVT_original)

del IVT_original
del IVT_seasonal

# IVT, do SVD

In [4]:
# Variable dimension
IVT_structure = config["Var_Data_structure_IVT"]
TOTAL_DIM  = IVT_structure[0]
TOTAL_YEAR = IVT_structure[1]
TOTAL_DAY  = IVT_structure[2]
TOTAL_LAT  = IVT_structure[3]
TOTAL_LON  = IVT_structure[4]

In [5]:
# Calculate SVD
count = time.time()
data = IVT.reshape(TOTAL_DIM, TOTAL_YEAR*TOTAL_DAY, TOTAL_LAT*TOTAL_LON)
data = np.concatenate((data[0,:,:],data[1,:,:]), axis = -1)
data = data.transpose()
SPACE, SV, TIME = np.linalg.svd(data, full_matrices=False)

print(f"Takes {(time.time()-count):.3f} sec")

Takes 180.896 sec


In [6]:
# Save SVD
os.chdir(config["Path_IVT_calculation"])
np.savez(config["Fname_IVT_svd"], 
         space = SPACE, 
         time = TIME, 
         singular_value = SV)

# SVD Explainability

In [7]:
# Defined threshold
threshold = 0.9

In [8]:
# Calculate explainability
variance = np.square(SV)
total_var = np.sum(variance)
ith_var = 0
feature_num = None
for i, var_i in enumerate(variance):
    ith_var += var_i/total_var
    if (ith_var >= threshold):
        print(f"First {i+1} components explain {ith_var*100:.2f}% variance.")
        feature_num = i+1
        break

First 38 components explain 90.15% variance.


In [9]:
# Save explainability and feature_num
config.update({"Var_Explainability_SVD": threshold})
config.update({"Var_Feature_num_SVD": int(feature_num)})
with open(config_path, 'w') as outfile:
    json.dump(config, outfile, sort_keys=True)
    outfile.close()