## Client Generation

In [None]:
from neumai.Client.NeumClient import NeumClient
import pandas as pd
import os

client = NeumClient(api_key=os.environ['NEUM_API_KEY'])

## Helper functions

In [None]:
from datetime import datetime, timezone

def calculate_time_differences_unix(unix_timestamp):
    now = datetime.now(timezone.utc)
    given_time = datetime.fromtimestamp(unix_timestamp, timezone.utc)

    diff = now - given_time
    diff_in_seconds = diff.total_seconds()
    diff_in_minutes = divmod(diff_in_seconds, 60)[0]
    diff_in_hours = divmod(diff_in_minutes, 60)[0]
    diff_in_days = divmod(diff_in_hours, 24)[0]

    date_str = given_time.strftime('%Y-%m-%d')

    if diff_in_seconds < 60:
        return f"{int(diff_in_seconds)} secs ago"
    elif diff_in_minutes < 60:
        return f"{int(diff_in_minutes)} mins ago"
    elif diff_in_hours < 24:
        return f"{int(diff_in_hours)} hours ago"
    elif diff_in_days < 15:
        return f"{int(diff_in_days)} days ago"
    else:
        return date_str

def calculate_time_differences_iso(iso_timestamp):
    now = datetime.now(timezone.utc)
    # Parse the ISO 8601 formatted string
    given_time = datetime.fromisoformat(iso_timestamp).replace(tzinfo=timezone.utc)

    diff = now - given_time
    diff_in_seconds = diff.total_seconds()
    diff_in_minutes = divmod(diff_in_seconds, 60)[0]
    diff_in_hours = divmod(diff_in_minutes, 60)[0]
    diff_in_days = divmod(diff_in_hours, 24)[0]

    date_str = given_time.strftime('%Y-%m-%d')

    if diff_in_seconds < 60:
        return f"{int(diff_in_seconds)} secs ago"
    elif diff_in_minutes < 60:
        return f"{int(diff_in_minutes)} mins ago"
    elif diff_in_hours < 24:
        return f"{int(diff_in_hours)} hours ago"
    elif diff_in_days < 15:
        return f"{int(diff_in_days)} days ago"
    else:
        return date_str

## Basic Auditing - Pipelines, Files and Users

Searches of pipeline

In [None]:
pipeline_id = "c8306336-1705-4a78-b26b-24920add9780"
retrievals = client.get_retrievals_by_pipeline_id(pipeline_id = pipeline_id)

In [None]:
df_retrievals_user = pd.DataFrame(retrievals)
df_retrievals_user_grouped = df_retrievals_user.groupby(df_retrievals_user['requested_by']).size()

import matplotlib.pyplot as plt

plt.figure(figsize=(10, 6))
df_retrievals_user_grouped.plot(kind='bar')
plt.xlabel('User')
plt.ylabel('Retrievals')
plt.title(f'Retrievals for pipeline {pipeline_id} per user')
plt.xticks(rotation=45)
plt.show()

Searches by file

In [None]:
file_id = f"{pipeline_id}_Fabulous Frozen Delights FAQ/Fabulous Frozen Delights - General FAQ.docx"
file_retrievals = client.get_retrievals_by_file_id(pipeline_id=pipeline_id, file_id=file_id)

In [None]:
df_file_retrievals = pd.DataFrame(file_retrievals)
df_file_retrievals = df_file_retrievals.drop(columns=["pipeline_id", "results", "status", "files"])
df_file_retrievals['timestamp'] = df_file_retrievals['timestamp'].apply(calculate_time_differences_unix)
# df_retrievals['modified_time'] = df_retrievals['modified_time'].apply(calculate_time_differences_iso)
display(df_file_retrievals)

Searches of file by users

In [None]:
import matplotlib.pyplot as plt
df_file_retrievals_user = df_file_retrievals.groupby(df_file_retrievals['requested_by']).size()

plt.figure(figsize=(10, 6))
df_file_retrievals_user.plot(kind='bar')
plt.xlabel('User')
plt.ylabel('Retrievals')
plt.title(f'Retrievals for file {file_id}')
plt.xticks(rotation=45)
plt.show()

Searches by user

In [None]:
user_id = "0.17424683841483146"
retrievals_by_user = client.get_retrievals_by_user_id(user_id=user_id)

In [None]:
df_user_retrievals = pd.DataFrame(retrievals_by_user)
df_user_retrievals = df_user_retrievals.drop(columns=["pipeline_id", "results", "status", "files"])
df_user_retrievals['timestamp'] = df_user_retrievals['timestamp'].apply(calculate_time_differences_unix)
# df_retrievals['modified_time'] = df_retrievals['modified_time'].apply(calculate_time_differences_iso)
display(df_user_retrievals)

In [None]:
import matplotlib.pyplot as plt
df_user_retrievals_graph = df_user_retrievals.groupby(df_user_retrievals['timestamp']).size()

plt.figure(figsize=(10, 6))
df_user_retrievals_graph.plot(kind='bar')
plt.xlabel('User')
plt.ylabel('Retrievals')
plt.title(f'Retrievals for user {user_id}')
plt.xticks(rotation=45)
plt.show()

Searches of a user for a pipeline

In [None]:
retrievals_by_user_in_pipeline = client.get_retrievals_by_pipeline_id_user_id(user_id=user_id, pipeline_id=pipeline_id)

In [None]:
df_user_pipeline_retrievals = pd.DataFrame(retrievals_by_user_in_pipeline)
df_user_pipeline_retrievals = df_user_pipeline_retrievals.drop(columns=["pipeline_id", "results", "status", "files"])
df_user_pipeline_retrievals['timestamp'] = df_user_pipeline_retrievals['timestamp'].apply(calculate_time_differences_unix)
# df_retrievals['modified_time'] = df_retrievals['modified_time'].apply(calculate_time_differences_iso)
display(df_user_pipeline_retrievals)

In [None]:
df_user_pipeline_retrievals_graph = df_user_pipeline_retrievals.groupby(df_user_pipeline_retrievals['timestamp']).size()

plt.figure(figsize=(10, 6))
df_user_pipeline_retrievals_graph.plot(kind='bar')
plt.xlabel('User')
plt.ylabel('Retrievals')
plt.title(f'Retrievals for user {user_id} in pipeline {pipeline_id}')
plt.xticks(rotation=45)
plt.show()

Searches by user on a file

In [None]:
retrievals_of_file_by_user = client.get_retrievals_by_file_id_user_id(user_id=user_id, file_id=file_id, pipeline_id=pipeline_id)

In [None]:
df_user_file_retrievals = pd.DataFrame(retrievals_of_file_by_user)
df_user_file_retrievals = df_user_file_retrievals.drop(columns=["pipeline_id", "results", "status", "files"])
df_user_file_retrievals['timestamp'] = df_user_file_retrievals['timestamp'].apply(calculate_time_differences_unix)
# df_retrievals['modified_time'] = df_retrievals['modified_time'].apply(calculate_time_differences_iso)
display(df_user_file_retrievals)

In [None]:
df_user_file_retrievals_graph = df_user_file_retrievals.groupby(df_user_file_retrievals['timestamp']).size()

plt.figure(figsize=(10, 6))
df_user_file_retrievals_graph.plot(kind='bar')
plt.xlabel('User')
plt.ylabel('Retrievals')
plt.title(f'Retrievals for user {user_id} of file {file_id}')
plt.xticks(rotation=45)
plt.show()