In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm
tqdm.pandas(desc='Processing')

from sbatch_pred.queuetime_prediction.system_state import (get_partition_nodes,
                                                           get_partition_node_group_proportions,
                                                           get_node_groups,
                                                           calculate_queue_state_features,
                                                           calculate_system_utilization_features)

from sbatch_pred.queuetime_prediction.model_training import prepare_model_data

In [None]:
slurm_df = pd.read_parquet('../../data/slurm_data.parquet')
nodes_df = pd.read_parquet('../../data/nodes.parquet')
partition_df = pd.read_parquet('../../data/partitions.parquet')
rt_pred_df = pd.read_parquet('../../data/predicted_runtime.parquet')

In [None]:
# Drop duplicate runtime predictions if present
rt_pred_df.drop_duplicates(subset=['job_array_id'], inplace=True)

# Merge runtime predictions with SLURM data
slurm_df = slurm_df.merge(rt_pred_df[['job_array_id', 'wallclock_pred']], on='job_array_id', how='left')

In [None]:
# Use wallclock requested if runtime prediction not available
# ...This is true for the earliest jobs in the dataset because 
# ...there are no earlier jobs available to train a model to predict their runtime
slurm_df['wallclock_pred'] = slurm_df['wallclock_pred'].fillna(slurm_df['wallclock_req'])

# Get job size, which is used for calculating system state features
slurm_df['node_secs_pred'] = slurm_df['wallclock_pred'] * slurm_df['nodes_req']

In [None]:
# Get a dictionary with lists of nodes in each partition
partition_nodes = get_partition_nodes(partition_df, slurm_df)

# For each partition, get the proportion of nodes in each node group
partition_node_group_proportions = get_partition_node_group_proportions(partition_nodes, nodes_df)

# Get the set of node groups containing nodes available to the partition the job was submitted to
slurm_df['node_groups'] = get_node_groups(nodes_df, slurm_df)

In [None]:
#knowledge_depths = ['node', 'partition', 'cluster']
knowledge_depths = ['cluster']
wallclock_knowledges = ['user', 'pred', 'perfect']

for wallclock_knowledge in wallclock_knowledges:
    for knowledge_depth in knowledge_depths:
        print(f'Calculating features for knowledge depth ({knowledge_depth}) and wallclock knowledge ({wallclock_knowledge}).')
        
        print('Calculating queue state features.')
        calculate_queue_state_features(slurm_df, nodes_df, partition_node_group_proportions, knowledge_depth, wallclock_knowledge, verbose=True)
        
        print('Calculating system utilization features.')
        calculate_system_utilization_features(partition_nodes, slurm_df, wallclock_knowledge, knowledge_depth)

        print('Preparing data for ML modeling.')
        prepare_model_data(knowledge_depth, wallclock_knowledge)