In [1]:
# Python packages
import numpy                  as np
from pathlib              import Path

# Utils
from   utils.constants    import SPCAM_Vars
from   utils.constants    import DATA_FOLDER, ANCIL_FILE, tau_min, tau_max, significance, experiment
import utils.utils            as utils
import utils.links            as links
import utils.pcmci_algorithm  as algorithm

## Variables to be processed

In [2]:
'''
This will be given via a wrapper (bash) or config file
'''
# spcam_parents      = ['tbp','qbp','vbp','ps','solin','shflx','lhflx']
# spcam_children     = ['tphystnd','prect', 'fsns', 'flns']
spcam_parents      = ['tbp','ps']
spcam_children     = ['flns']
spcam_vars_include = spcam_parents + spcam_children

In [3]:
SPCAM_Vars    = [var for var in SPCAM_Vars if var.label in spcam_vars_include]
spcam_3d_vars = [var for var in SPCAM_Vars if var.dimensions == 3]
spcam_2d_vars = [var for var in SPCAM_Vars if var.dimensions == 2]
input_vars    = [var for var in SPCAM_Vars if var.type == 'in']
output_vars   = [var for var in SPCAM_Vars if var.type == 'out']

# Main program

## Options
This should ideally be read from a configuration file

In [8]:
# Options
#experiment = '002_train_1_year'  # "experiment" to be used

#region    = None
region     = [ [5,7] , [120,120] ] # [ [lat1, lat2], [lon1, lon2]]
if region is not None:
    gridpoints = utils.get_gridpoints(region)
else:
    gridpoints = None # All
# gridpoints = [
#         [5, 120],             # "Tropical" region
#         [6, 120],
#         [7, 120]
# ]

target_levels = None # All
target_levels = [1000, 700, 300, 80] # Nearest level (hPa)
pressures     = [850, 700]
if pressures is not None and target_levels is None:
    target_levels = utils.get_levels(pressures)

#pc_alpha      = None
# TODO
# Check with these values with a few columns, use selection for the rest
# Which alpha gives the right physics?
list_pc_alpha = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2]

verbosity = 2

output_folder = "./test_outputs"

In [9]:
gridpoints, target_levels

([[4.185920533189154, 120.9375], [6.976533553948636, 120.9375]],
 masked_array(data=[691.38943031, 763.40448111, 820.85836865, 859.53476653],
              mask=False,
        fill_value=1e+20))

In [10]:
levels, latitudes, longitudes = utils.read_ancilaries(Path(DATA_FOLDER, ANCIL_FILE))
# idx_levs format: target level, index of closest value
if target_levels is not None:
    idx_levs = [[lev, utils.find_closest_value(levels, lev)]
                        for lev in target_levels]
else:
    idx_levs = [[round(lev, 2), i] for i, lev in enumerate(levels)]
    
# #print(levels[idx_levs])
# for target, idx in idx_levs:
#    print(f"target = {target},\tindex = {idx},\tlevel = {levels[idx]}")

if gridpoints is not None:
    idx_lats = [utils.find_closest_value(latitudes, gridpoint[0])      for gridpoint in gridpoints]
    idx_lons = [utils.find_closest_longitude(longitudes, gridpoint[1]) for gridpoint in gridpoints]
else:
    idx_lats = [idx_lat for idx_lat in range(len(latitudes))
                        for idx_lon in range(len(longitudes))]
    idx_lons = [idx_lon for idx_lat in range(len(latitudes))
                        for idx_lon in range(len(longitudes))]

In [11]:
print(idx_lats)
print(idx_lons)
print(idx_levs)

[33, 34]
[43, 43]
[[691.3894303143024, 20], [763.404481112957, 21], [820.8583686500788, 22], [859.5347665250301, 23]]


In [12]:
var_list = SPCAM_Vars

var_parents = [var for var in var_list if var.type == "in"]
var_children = [var for var in var_list if var.type == "out"]

import time
import datetime

# Load data
len_grid = len(gridpoints)
t_start = time.time()
for i_grid, (i_lat, i_lon) in enumerate(gridpoints):
    
    idx_lat = idx_lats[i_grid]
    idx_lon = idx_lons[i_grid]
    
    print(f"Gridpoint {i_grid+1}/{len_grid}: lat={latitudes[idx_lats[i_grid]]}"
          + f" ({idx_lat}), lon={longitudes[idx_lons[i_grid]]} ({idx_lon})")
    print("")
    
    print(f"Load Parents (state fields)...")
    t_before_load_parents = time.time()
    data_parents = utils.load_data(var_parents,
                             experiment,
                             DATA_FOLDER,
                             idx_levs[:], # Filter for testing purposes
                             idx_lat,
                             idx_lon
                            )
    time_load_parents = datetime.timedelta(seconds = time.time() - t_before_load_parents)
    print(f"All parents loaded. Time: {time_load_parents}")
    print("")
    

    print(f"PCMCI for each child (parameterization):")
    for child in var_children:
        
        print(f"Variable: {child.name}")
        
        for level in idx_levs:
            data_child = utils.load_data([child], # Expects iterable
                                   experiment,
                                   DATA_FOLDER,
                                   [level], # Expects iterable
                                   idx_lat,
                                   idx_lon)
            data = [*data_parents, *data_child]

            print(f"... find causal links for {child.name} at level: {level[0]} hPa [{level[1]+1}]")
            t_before_find_links = time.time()
            
            # Find links
            results = algorithm.find_links(data, list_pc_alpha, 0)
            time_links = datetime.timedelta(
                    seconds = time.time() - t_before_find_links)
            print(f"Links found. Time: {time_links}")

            utils.save_results(results, child, level[1], i_lat, i_lon,
                               experiment, output_folder)

            if child.dimensions == 2:
                break # Stop executing after the first level
        print("")
    
    time_point = datetime.timedelta(seconds = time.time() - t_before_load_parents)
    print(f"PCMCI in gridpoint finished. Time: {time_point}")
    print(""); print("")

print("")
total_time = datetime.timedelta(seconds = time.time() - t_start)
print(f"Execution complete. Total time: {total_time}")    

Gridpoint 1/2: lat=4.185920533189154 (33), lon=120.9375 (43)

Load Parents (state fields)...
Loading (tbp, 3, in), tbp, level: 21
Loading (tbp, 3, in), tbp, level: 22
Loading (tbp, 3, in), tbp, level: 23
Loading (tbp, 3, in), tbp, level: 24
Loading (ps, 2, in), ps, level: 21
All parents loaded. Time: 0:00:25.494189

PCMCI for each child (parameterization):
Variable: flns
Loading (flns, 2, out), flns, level: 21
... find causal links for flns at level: 691.3894303143024 hPa [21]
Links found. Time: 0:00:00.664080
Saved results into "test_outputs/flns_21_lat4.185920533189154_lon120.9375_002_train_1_year.obj"

PCMCI in gridpoint finished. Time: 0:00:31.717507


Gridpoint 2/2: lat=6.976533553948636 (34), lon=120.9375 (43)

Load Parents (state fields)...
Loading (tbp, 3, in), tbp, level: 21
Loading (tbp, 3, in), tbp, level: 22
Loading (tbp, 3, in), tbp, level: 23
Loading (tbp, 3, in), tbp, level: 24
Loading (ps, 2, in), ps, level: 21
All parents loaded. Time: 0:00:09.313223

PCMCI for each ch

In [None]:
# print(results.keys())
# #print(results[str(list_pc_alpha[0])])
# print(results[str(list_pc_alpha[0])])

In [14]:
# import datetime
# import time
# before = time.time()
# time.sleep(10)

In [15]:
# print(t := time.time() - before)

10.019274234771729
