# ***DESCRIPTION*** 
## ***Run Tigramite (PCMCI) for SPCAM data with specified settings:***
### Fixed:
- PC-stable (i.e., MCI component not run)
- tau_min/tau_max = -1
- Significance: analytics
- experiments: '002_train_1_year'
- links: parents (state fields) -> children (parameterizations)
### Options:
- children (parameterizations)
- region: lat/lon limits (gridpoints to be used)
- levels: children's levels to be explored
- pc_alphas: list of value(s)

In [1]:
# Python packages
import sys, getopt, yaml, time, datetime
import numpy                  as np
from pathlib              import Path

# Utils
from   utils.constants    import SPCAM_Vars
from   utils.constants    import DATA_FOLDER, ANCIL_FILE, tau_min, tau_max, significance, experiment
import utils.utils            as utils
import utils.links            as links
import utils.pcmci_algorithm  as algorithm

## Specifications

In [2]:
argv           = sys.argv[1:] #argv           = ['-c', 'cfg_pipeline.yml']
try:
    opts, args = getopt.getopt(argv,"hc:a",["cfg_file=","add="])
except getopt.GetoptError:
    print ('pipeline.py -c [cfg_file] -a [add]')
    sys.exit(2)
for opt, arg in opts:
    if opt == '-h':
        print ('pipeline.py -c [cfg_file]')
        sys.exit()
    elif opt in ("-c", "--cfg_file"):
        yml_cfgFilenm = arg
    elif opt in ("-a", "--add"):
        pass

# YAML config file
yml_cfgFile       = open(yml_cfgFilenm)
yml_cfg           = yaml.load(yml_cfgFile, Loader=yaml.FullLoader)

# Load specifications
spcam_parents     = yml_cfg['spcam_parents']
spcam_children    = yml_cfg['spcam_children']
pc_alphas         = yml_cfg['pc_alphas']
region            = yml_cfg['region']
lim_levels        = yml_cfg['lim_levels']
target_levels     = yml_cfg['target_levels']
verbosity         = yml_cfg['verbosity']
output_folder     = yml_cfg['output_folder']

In [3]:
## Region / Gridpoints
if region is False:
    region     = [ [-90,90] , [0,-.5] ] # All
gridpoints = utils.get_gridpoints(region)

## Children levels (parents includes all)
if lim_levels is not False and target_levels is False:
    target_levels = utils.get_levels(lim_levels)

In [4]:
## Model's grid
levels, latitudes, longitudes = utils.read_ancilaries(Path(DATA_FOLDER, ANCIL_FILE))

## Latitude / Longitude indexes
idx_lats = [utils.find_closest_value(latitudes, gridpoint[0])      for gridpoint in gridpoints]
idx_lons = [utils.find_closest_longitude(longitudes, gridpoint[1]) for gridpoint in gridpoints]

## Level indexes (children & parents)
parents_idx_levs = [[round(lev, 2), i] for i, lev in enumerate(levels)] # All
if target_levels is not False:
    children_idx_levs = [[lev, utils.find_closest_value(levels, lev)] for lev in target_levels]
else:
    children_idx_levs = parents_idx_levs

In [5]:
## Variables
spcam_vars_include = spcam_parents + spcam_children
SPCAM_Vars         = [var for var in SPCAM_Vars if var.label in spcam_vars_include]
var_parents        = [var for var in SPCAM_Vars if var.type == "in"]
var_children       = [var for var in SPCAM_Vars if var.type == "out"]  

### Processing

In [6]:
len_grid = len(gridpoints)
t_start = time.time()
for i_grid, (i_lat, i_lon) in enumerate(gridpoints):
    
    idx_lat = idx_lats[i_grid]
    idx_lon = idx_lons[i_grid]
    
    print(f"Gridpoint {i_grid+1}/{len_grid}: lat={latitudes[idx_lats[i_grid]]}"
          + f" ({idx_lat}), lon={longitudes[idx_lons[i_grid]]} ({idx_lon})")
    print("")
    
    print(f"Load Parents (state fields)...")
    t_before_load_parents = time.time()
    data_parents = utils.load_data(var_parents,
                             experiment,
                             DATA_FOLDER,
                             parents_idx_levs,
                             idx_lat,
                             idx_lon
                            )
    time_load_parents = datetime.timedelta(seconds = time.time() - t_before_load_parents)
    print(f"All parents loaded. Time: {time_load_parents}")
    print("")
    

    print(f"PCMCI for each child (parameterization):")
    for child in var_children:
        
        print(f"Variable: {child.name}")
        
        for level in children_idx_levs:
            if child.dimensions == 3:
                print(f"... find causal links for {child.name} at level: {level[0]} hPa [{level[1]+1}]")
                i_lev = level[1]
            else:
                print(f"... find causal links for {child.name}")
                i_lev = 0
                
            data_child = utils.load_data([child],
                                   experiment,
                                   DATA_FOLDER,
                                   [level],
                                   idx_lat,
                                   idx_lon)
            data = [*data_parents, *data_child]
            
            # Find links
            t_before_find_links = time.time()
            results = algorithm.find_links(data, pc_alphas, 0)
            time_links = datetime.timedelta(seconds = time.time() - t_before_find_links)
            print(f"Links found. Time: {time_links}")

            utils.save_results(results, child, i_lev, i_lat, i_lon, experiment, output_folder)

            if child.dimensions == 2:
                break # Stop executing; just one level
        print("")
    
    time_point = datetime.timedelta(seconds = time.time() - t_before_load_parents)
    print(f"PCMCI in gridpoint finished. Time: {time_point}")
    print(""); print("")

print("")
total_time = datetime.timedelta(seconds = time.time() - t_start)
print(f"Execution complete. Total time: {total_time}")  

Gridpoint 1/1: lat=4.185920533189154 (33), lon=120.9375 (43)

Load Parents (state fields)...
All parents loaded. Time: 0:03:09.175463

PCMCI for each child (parameterization):
Variable: flns
... find causal links for flns
Links found. Time: 0:00:25.129881
Saved results into "test_outputs/flns_1_lat-4_lon-120_002_train_1_year.obj"

PCMCI in gridpoint finished. Time: 0:03:41.033048



Execution complete. Total time: 0:03:41.074463
