# Inference Time

Plot the time taken to make predictions for each deployment

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def get_deployment_times(inference_dir):
    deps2 = os.listdir(inference_dir)
    deps2 = [dep for dep in deps2 if dep.startswith('dep')]

    # subset to dirs only, not files
    deps2 = [dep for dep in deps2 if os.path.isdir(f'{inference_dir}/{dep}')]

    print(f'Deployments: {", ".join(deps2)}')
    diffs = pd.DataFrame({'dep': [], 'time_diff': [], 'start_date': [], 'end_date':[]})

    for dep in deps2:
        files = os.listdir(f'{inference_dir}/{dep}')
        files = [file for file in files if file.endswith('.csv')]

        # get the datetime the file was created
        dates = [os.path.getctime(f'{inference_dir}/{dep}/{file}') for file in files]
        dates = [pd.to_datetime(date, unit='s') for date in dates]
        print(len(dates))

        if len(dates) == 0:
            continue
        time_diff = max(dates) - min(dates)
        n_files = len(dates)
        temp = pd.DataFrame({
            'dep': [dep], 'time_diff': [time_diff], 'start_date': [min(dates)],
            'end_date': [max(dates)], 'n_files': [n_files]
        })

        diffs = pd.concat([diffs, temp], ignore_index=True)
    return diffs

def plot_inference_times(inference_df, remove_outliers=True):
    inference_df['time_taken'] = inference_df['time_diff'].apply(lambda x: x.total_seconds()/3600)

    if remove_outliers:
        inference_df = inference_df[inference_df['time_taken'] < 3*24]

        # reset the index
        inference_df = inference_df.reset_index(drop=True)

    plt.figure(figsize=(10, 5))
    sns.scatterplot(x='n_files', y='time_taken', data=inference_df)

    for i in range(inference_df.shape[0]):
        plt.text(inference_df['n_files'][i], inference_df['time_taken'][i], inference_df['dep'][i])

    plt.xticks(rotation=45)
    plt.xlabel('Time taken (hours)')
    plt.ylabel('100 of files')
    plt.title('Time difference between first and last file in each deployment')
    plt.show()


In [None]:
inference_times = get_deployment_times('/home/users/katriona/amber-inferences/data/singapore')

In [None]:
inference_times

In [None]:
plot_inference_times(inference_times)