In [1]:
%load_ext autoreload
%matplotlib inline

In [5]:
import os
import glob
import sys
import yaml

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import re
import datetime as dt

from tempest import casestudy
from tempest import grid
from tempest import joint_distrib
from tempest import handler

settings_path = 'settings/tropics.yaml'

In [4]:
# Instantiate CaseStudy by passing the settings. 
# Should also create appropriate directories
hdlr = handler.Handler(settings_path)
cs = casestudy.CaseStudy(hdlr, overwrite = False ,verbose = False)
gr = grid.Grid(cs, fast = True, overwrite= False, verbose_steps = False, verbose = False)
# jd = joint_distrib.JointDistribution(gr)

if __name__ == '__main__':
    # print("Loaded Configuration:")
    # for key, value in settings.items():
    #     print(f"{key}: {value}")
    
    #gr.compute_funcs_for_var_id()
    pass


Data loaded from /homedata/mcarenso/tempest/DYAMOND_SAM_Tropics/var_id_days_i_t.json
Found grid attributes file , so loading /homedata/mcarenso/tempest/DYAMOND_SAM_Tropics/grid_attributes.pkl instead of computing


In [None]:
def _load_var_id_in_data_in(self):
    """
    this functions loads the data from your DIR_DATA_IN settings

    :param dir: The path to your DIR_DATA_IN in .yaml

    :return
        var_id: list of variables found
        days_i_t_per_var_id: a dictionnary that contains the days and correspong indexes per var_id    
                        days_i_t_per_var_id[var_id] = dict with keys the dates and values the indexes
    """
    dir = self.settings['DIR_DATA_IN']
    variables_id = []
    days_i_t_per_var_id = {}

    def get_day_and_i_t(filename):
            # Define a regex pattern to extract the timestamp from your file path
            timestamp_pattern = r'(\d{10})\.\w+\.2D\.nc'

            # starting date
            date_ref = dt.datetime(year=2016, month=8, day=1)
            
            def get_datetime_and_i_t(filename):
                # Extract the timestamp from the file path
                match = re.search(timestamp_pattern, filename)
                if match:
                    timestamp = int(match.group(1))
                    # Calculate the delta in seconds
                    delta_t = dt.timedelta(seconds=timestamp * 7.5)
                    # Calculate the current date
                    date_current = date_ref + delta_t

                    i_t = int(timestamp / 240) 
                    return date_current, i_t
                else:
                    return None  # Handle cases where the timestamp couldn't be extracted

            # time dimension
            
            date_time, i_t = get_datetime_and_i_t(filename)
            days_i_t = {} ## maybe here work on storing the full datetimes with corresponding i_t. 
            day = date_time.strftime("%y-%m-%d")
            return day, i_t

    # Define a regular expression pattern to extract variable names from filenames.
    variable_pattern = re.compile(r'\.([A-Za-z0-9]+)\.2D\.nc$')

    for root, _, files in os.walk(dir):
        for filename in sorted(files):
            match = variable_pattern.search(filename)
            if match:
                var_id = match.group(1)
                if var_id not in variables_id : 
                    variables_id.append(var_id)
                    days_i_t_per_var_id[var_id] = {}
                    
                day, i_t = get_day_and_i_t(filename)
                if day not in list(days_i_t_per_var_id[var_id].keys()):
                    days_i_t_per_var_id[var_id][day] = [i_t]
                else :
                    days_i_t_per_var_id[var_id][day].append(i_t)
                    
    return variables_id, days_i_t_per_var_id