# ***DESCRIPTION*** 
## ***Run Tigramite (PCMCI) for SPCAM data with specified settings:***
### Fixed:
- PC-stable (i.e., MCI component not run)
- tau_min/tau_max = -1
- Significance: analytics
- experiments: '002_train_1_year'
- links: parents (state fields) -> children (parameterizations)
### Options:
- region: lat/lon limits (gridpoints to be used)
- levels: children's levels to be explored
- pc_alpha: list of value(s)

In [14]:
# Python packages
import sys, getopt
import numpy                  as np
from pathlib              import Path

# Utils
from   utils.constants    import SPCAM_Vars
from   utils.constants    import DATA_FOLDER, ANCIL_FILE, tau_min, tau_max, significance, experiment
import utils.utils            as utils
import utils.links            as links
import utils.pcmci_algorithm  as algorithm

## Variables to be processed

In [None]:
spcam_parents  = ['tbp','qbp','vbp','ps','solin','shflx','lhflx']
spcam_children = ['tphystnd','prect', 'fsns', 'flns']
region         = None
lim_levels     = None
target_levels  = None
list_pc_alpha  = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2]
try:
    opts, args = getopt.getopt(argv,"hp:c:r:l:t:a",["parents=","children=",
                                                    "region=","lim_levels=",
                                                    "target_levels=","pc-alpha"])
except getopt.GetoptError:
    print ('pipeline.py -p [parents] -c [children] -r [region] -l [lim_levels] -t [target_levels] -a [pc-alpha]')
    sys.exit(2)
for opt, arg in opts:
    print(opt, arg)
    if opt == '-h':
        print ('pipeline.py -p [parents] -c [children]')
        sys.exit()
    elif opt in ("-p", "--parents"):
        spcam_parents = arg
    elif opt in ("-c", "--children"):
        spcam_children = arg
    elif opt in ("-r", "--region"):
        region = arg
    elif opt in ("-l", "--lim_levels"):
        lim_levels = arg
    elif opt in ("-t", "--target_levels"):
        target_levels = arg
    elif opt in ("-a", "--pc-alpha"):
        list_pc_alpha = arg
print ('Parents are: ', spcam_parents)
print ('Children are: ', spcam_children)
print ('Region is: ', region)
print ('lim_levels are: ', lim_levels)
print ('target_levels are: ', target_levels)
print ('pc-alpha is: ', list_pc_alpha)

In [15]:
'''
This will be given via a wrapper (bash) or config file
'''
#spcam_parents      = ['tbp','qbp','vbp','ps','solin','shflx','lhflx']
spcam_parents      = ['tbp','ps']
# spcam_children     = ['tphystnd','prect', 'fsns', 'flns']
spcam_children     = ['flns']

In [16]:
spcam_vars_include = spcam_parents + spcam_children
SPCAM_Vars         = [var for var in SPCAM_Vars if var.label in spcam_vars_include]
spcam_3d_vars      = [var for var in SPCAM_Vars if var.dimensions == 3]
spcam_2d_vars      = [var for var in SPCAM_Vars if var.dimensions == 2]
input_vars         = [var for var in SPCAM_Vars if var.type == 'in']
output_vars        = [var for var in SPCAM_Vars if var.type == 'out']

## Options
This should ideally be read from a configuration file

In [17]:
## Region / Gridpoints
#region    = None
region     = [ [4,4] , [120,120] ] # [ [lat1, lat2], [lon1, lon2]]
if region is None:
    region     = [ [-90,90] , [0,-.5] ] # All
gridpoints = utils.get_gridpoints(region)

## Children levels (parents includes all) 
target_levels = None # None: all; [1000, 700, 300, 80] Nearest level (hPa)
lim_levels    = [850, 700]
if lim_levels is not None and target_levels is None:
    target_levels = utils.get_levels(lim_levels)

## PC-Alpha(s)
#pc_alpha      = None
list_pc_alpha = [0.001, 0.002, 0.005, 0.01, 0.02, 0.05, 0.1, 0.2]

verbosity = 2
output_folder = "./test_outputs"

In [8]:
levels, latitudes, longitudes = utils.read_ancilaries(Path(DATA_FOLDER, ANCIL_FILE))

# Latitude / Longitude indexes
idx_lats = [utils.find_closest_value(latitudes, gridpoint[0])      for gridpoint in gridpoints]
idx_lons = [utils.find_closest_longitude(longitudes, gridpoint[1]) for gridpoint in gridpoints]

# Level indexes (children & parents)
parents_idx_levs = [[round(lev, 2), i] for i, lev in enumerate(levels)] # All
if target_levels is not None:
    children_idx_levs = [[lev, utils.find_closest_value(levels, lev)] for lev in target_levels]
else:
    children_idx_levs = parents_idx_levs

In [13]:
var_list = SPCAM_Vars

var_parents = [var for var in var_list if var.type == "in"]
var_children = [var for var in var_list if var.type == "out"]

import time
import datetime

# Load data
len_grid = len(gridpoints)
t_start = time.time()
for i_grid, (i_lat, i_lon) in enumerate(gridpoints):
    
    idx_lat = idx_lats[i_grid]
    idx_lon = idx_lons[i_grid]
    
    print(f"Gridpoint {i_grid+1}/{len_grid}: lat={latitudes[idx_lats[i_grid]]}"
          + f" ({idx_lat}), lon={longitudes[idx_lons[i_grid]]} ({idx_lon})")
    print("")
    
    print(f"Load Parents (state fields)...")
    t_before_load_parents = time.time()
    data_parents = utils.load_data(var_parents,
                             experiment,
                             DATA_FOLDER,
                             parents_idx_levs,
                             idx_lat,
                             idx_lon
                            )
    time_load_parents = datetime.timedelta(seconds = time.time() - t_before_load_parents)
    print(f"All parents loaded. Time: {time_load_parents}")
    print("")
    

    print(f"PCMCI for each child (parameterization):")
    for child in var_children:
        
        print(f"Variable: {child.name}")
        
        for level in children_idx_levs:
            if child.dimensions == 3:
                print(f"... find causal links for {child.name} at level: {level[0]} hPa [{level[1]+1}]")
                i_lev = level[1]
            else:
                print(f"... find causal links for {child.name}")
                i_lev = 0
                
            data_child = utils.load_data([child],
                                   experiment,
                                   DATA_FOLDER,
                                   [level],
                                   idx_lat,
                                   idx_lon)
            data = [*data_parents, *data_child]
            
            # Find links
            t_before_find_links = time.time()
            results = algorithm.find_links(data, list_pc_alpha, 0)
            time_links = datetime.timedelta(seconds = time.time() - t_before_find_links)
            print(f"Links found. Time: {time_links}")

            utils.save_results(results, child, i_lev, i_lat, i_lon, experiment, output_folder)

            if child.dimensions == 2:
                break # Stop executing; just one level
        print("")
    
    time_point = datetime.timedelta(seconds = time.time() - t_before_load_parents)
    print(f"PCMCI in gridpoint finished. Time: {time_point}")
    print(""); print("")

print("")
total_time = datetime.timedelta(seconds = time.time() - t_start)
print(f"Execution complete. Total time: {total_time}")    

Gridpoint 1/1: lat=4.185920533189154 (33), lon=120.9375 (43)

Load Parents (state fields)...
All parents loaded. Time: 0:02:53.924325

PCMCI for each child (parameterization):
Variable: flns
... find causal links for flns
Links found. Time: 0:00:15.905123
Saved results into "test_outputs/flns_2_lat-4_lon-120_002_train_1_year.obj"

PCMCI in gridpoint finished. Time: 0:03:15.491240



Execution complete. Total time: 0:03:15.558574


In [None]:
# print(results.keys())
# #print(results[str(list_pc_alpha[0])])
# print(results[str(list_pc_alpha[0])])

In [14]:
# import datetime
# import time
# before = time.time()
# time.sleep(10)

In [15]:
# print(t := time.time() - before)

10.019274234771729
