In [1]:
continuous_series_names = [
                           'uncorrelated_gaussian_centered_sigma_0.1_KDE',
                           'uncorrelated_gaussian_centered_sigma_0.3_KDE',                           
                           'uncorrelated_gaussian_centered_sigma_0.5_KDE',
                           'uncorrelated_gaussian_centered_sigma_0.8_KDE',
                           'uncorrelated_gaussian_centered_sigma_0.1_histogram',
                           'uncorrelated_gaussian_centered_sigma_0.3_histogram',
                           'uncorrelated_gaussian_centered_sigma_0.5_histogram',
                           'uncorrelated_gaussian_centered_sigma_0.8_histogram',
                           'uncorrelated_random_PDF_l_0.1_KDE',
                           'uncorrelated_random_PDF_l_0.1_histogram',
                           'uncorrelated_random_PDF_l_0.4_histogram',
                           'uncorrelated_random_PDF_l_0.4_KDE',
                           'uncorrelated_random_PDF_l_0.02_KDE',
                           'uncorrelated_random_PDF_l_0.02_histogram',
                           ]
markov_chain_names = ['markov_chain']

import numpy as np

### Set up directory
import sys
import os
from pathlib import Path
parent_dir = os.path.dirname(os.getcwd())
sys.path.append(parent_dir)

from tqdm import tqdm
import pickle
import torch
import matplotlib.pyplot as plt

# Check if directory exists, if not create it
save_path = Path(parent_dir) / 'processed_series'
if not os.path.exists(save_path):
    os.makedirs(save_path)
    
# Define the directory where the generated series are stored
generated_series_dir = Path(parent_dir) / 'generated_series'

In [2]:
# Initialize dictionaries to store the data for continuous series and Markov chains
continuous_series_task = {}
markov_chain_task = {}

# Loop through each file in the directory
for file in generated_series_dir.iterdir():
    # Check if a series is already processed
    if not (save_path / file.name).exists():
        # Extract the series name from the file name
        series_name = file.stem.rsplit('_', 1)[0]
        # If the series is a continuous series, load the data into the continuous_series_data dictionary
        if series_name in continuous_series_names:
            continuous_series_task[file.name] = pickle.load(file.open('rb'))
        # If the series is a Markov chain, load the data into the markov_chain_data dictionary
        elif series_name in markov_chain_names:
            markov_chain_task[file.name] = pickle.load(file.open('rb'))
        # If the series name is not recognized, raise an exception
        # else:
        #     raise Exception(f"Unrecognized series name: {series_name}")
        


In [3]:
print(continuous_series_task.keys())
print(markov_chain_task.keys())

dict_keys(['uncorrelated_random_PDF_l_0.4_KDE_1.pkl', 'uncorrelated_random_PDF_l_0.02_KDE_0.pkl', 'uncorrelated_random_PDF_l_0.4_histogram_1.pkl', 'uncorrelated_random_PDF_l_0.4_histogram_0.pkl', 'uncorrelated_random_PDF_l_0.4_KDE_0.pkl'])
dict_keys([])


### Analyze Multi Digit series

In [4]:
import importlib
import baseline_models
importlib.reload(baseline_models)

from baseline_models import *

In [5]:
for series_name, series_dict in sorted(continuous_series_task.items()):   
    llama_size = series_dict['llama_size']
    prec = series_dict['prec']
    kernel = series_dict['kernel']
    rescaled_full_series = series_dict['rescaled_full_series']
    if llama_size == 'KDE':
        print("Processing ", series_name)
        PDF_list = KDE_for_series(rescaled_full_series,kernel,prec)
        series_dict['PDF_list'] = PDF_list
        save_name = os.path.join(save_path, series_name)
        with open(save_name, 'wb') as f:
            pickle.dump(series_dict, f)
            
    if llama_size == 'histogram':
        print("Processing ", series_name)
        PDF_list = histogram_for_series(rescaled_full_series,prec)
        series_dict['PDF_list'] = PDF_list
        save_name = os.path.join(save_path, series_name)
        with open(save_name, 'wb') as f:
            pickle.dump(series_dict, f)            


Processing  uncorrelated_random_PDF_l_0.02_KDE_0.pkl
Processing  uncorrelated_random_PDF_l_0.4_KDE_0.pkl


Processing  uncorrelated_random_PDF_l_0.4_KDE_1.pkl
Processing  uncorrelated_random_PDF_l_0.4_histogram_0.pkl
Processing  uncorrelated_random_PDF_l_0.4_histogram_1.pkl
