In [1]:
import os
from tqdm import tqdm
import pandas as pd
import multiprocessing_notebook_helpers
import numpy as np
import tensorflow as tf
import random 
import multiprocessing

In [2]:
def set_random(seed_value):
    # 1. Set `PYTHONHASHSEED` environment variable at a fixed value
    os.environ['PYTHONHASHSEED']=str(seed_value)
    # 2. Set `python` built-in pseudo-random generator at a fixed value
    random.seed(seed_value)
    # 3. Set `numpy` pseudo-random generator at a fixed value
    np.random.seed(seed_value)
    # 4. Set `tensorflow` pseudo-random generator at a fixed value
    tf.random.set_seed(seed_value)

set_random(12345)

In [3]:
def get_files_and_names():
    # benchmark files checking
    all_files=[]
    all_files_name = []
    for root, dirs, files in os.walk("./data/"):
        for file in files:
            if file.endswith(".csv"):
                all_files.append(os.path.join(root, file))
                all_files_name.append(root.split("/")[-1] + "_" + file.replace(".", "_"))
    return all_files, all_files_name

def get_anomaly_data_and_names():
    all_files, all_files_name = get_files_and_names()
    # datasets with anomalies loading
    list_of_df = [pd.read_csv(file, 
                            sep=';', 
                            index_col='datetime', 
                            parse_dates=True) for file in all_files if 'anomaly-free' not in file]
    list_of_names = [file for file in all_files_name if 'anomaly-free' not in file]
    return list_of_df, list_of_names

# benchmark files checking
all_files, all_names = get_files_and_names()
# datasets with anomalies loading
list_of_df, list_of_names = get_anomaly_data_and_names()

In [4]:
def driver_func(runnable_pipe_func, N_ESTIMATORS, N_CORES, list_of_df, random_seeds):

    with multiprocessing.Pool(N_CORES) as pool:
        results = [pool.apply_async(runnable_pipe_func, (f"task_model_{str(task_ind).rjust(3,'0')}",list_of_df, seed)) for task_ind, seed in zip(range(N_ESTIMATORS), random_seeds)]
        
        all_seeds = []
        count = 0
        for r in tqdm(results, f"task_model_{str(count).rjust(3,'0')}"):
            tmp_res = r.get()
            all_seeds.append(tmp_res)
            count+=1
    
    return all_seeds

In [5]:
N_ESTIMATORS = 51
N_CORES = 10

In [6]:
random_seeds = np.random.choice(9999, N_ESTIMATORS, replace=False).tolist()
print(random_seeds)

[342, 3245, 4856, 6866, 3227, 7118, 6362, 7493, 6260, 7099, 9746, 5795, 7514, 8485, 1859, 4960, 680, 4831, 7679, 3022, 4618, 2698, 9141, 7000, 9082, 7857, 8371, 9497, 6504, 8686, 1052, 9523, 7695, 4409, 5850, 532, 7187, 686, 1493, 6968, 4968, 5276, 8612, 6338, 8518, 576, 2517, 2374, 7312, 2659, 8652]


In [7]:
res = driver_func(multiprocessing_notebook_helpers.run_pipeline_conv_ae, N_ESTIMATORS, N_CORES, list_of_df, random_seeds)

task_model_000: 100%|██████████| 51/51 [1:11:55<00:00, 84.61s/it] 
