# High-Level Batching Post-analysis

##### Imports

In [1]:
from neo4j import GraphDatabase
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime
import pandas as pd
import plotly.express as px
from dotenv import load_dotenv
import os


In [2]:
load_dotenv()
uri = os.getenv('NEO4J_URI')
username = os.getenv('NEO4J_USER')
password = os.getenv('NEO4J_PASSWORD')

In [3]:
driver = GraphDatabase.driver(uri, auth=(username, password))

##### Number of Batches

In [4]:
cypher_total_batch_number = """
    MATCH (n:HighLevelBatch)
    RETURN count (distinct n)
"""


def total_batches(uri, username, password, cypher_total_batch_number):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_total_batch_number)
        data = result.data()
        return data
    
results_total_batches = total_batches(uri, username, password, cypher_total_batch_number)
print(results_total_batches) 

[{'count (distinct n)': 22023}]


In [5]:
cypher_batches_in_high_distribution = """
    MATCH (n:HighLevelBatch)
WITH 
    CASE WHEN size(n.corr_batch_numbers) = 1 THEN 1 ELSE 0 END AS number_batches_1, 
    CASE WHEN size(n.corr_batch_numbers) > 1 THEN 1 ELSE 0 END AS joint_batches
RETURN 
    sum(number_batches_1) AS frequency_1, 
    sum(joint_batches) AS frequency_joint_batches

"""
def batch_in_high_distribution(uri, username, password, cypher_batches_in_high_distribution):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_batches_in_high_distribution)
        data = result.data()
        return data
    
results_batches_in_high_distribution = batch_in_high_distribution(uri, username, password, cypher_batches_in_high_distribution)
print(results_batches_in_high_distribution) 


[{'frequency_1': 18613, 'frequency_joint_batches': 3410}]


##### Average, MIN and MAX Amount of Kits in a Batch

In [6]:
cypher_batches_in_high_distribution = """
    MATCH (n:HighLevelBatch)
    WITH size(n.corr_batch_numbers) AS number_batches
    RETURN number_batches, count(number_batches) AS frequency 
"""
def batch_in_high_distribution(uri, username, password, cypher_batches_in_high_distribution):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_batches_in_high_distribution)
        data = result.data()
        return data
    
results_batches_in_high_distribution = batch_in_high_distribution(uri, username, password, cypher_batches_in_high_distribution)

number_batches = [record['number_batches'] for record in results_batches_in_high_distribution]
frequency = [record['frequency'] for record in results_batches_in_high_distribution]

df = pd.DataFrame({
    'Number of Batches': number_batches,
    'Frequency': frequency
})

fig = px.bar(df, x='Number of Batches', y='Frequency', title='Distribution of Batches over High_Level Batches',
                 labels={'Number of Batches': 'Number of Batches', 'Frequency': 'Frequency'}, 
                 opacity=0.6)
fig.update_traces(texttemplate='%{y}', textposition='outside', textfont_size=8)

fig.show()

##### Batch Frequency per Activity

In [7]:
cypher_batches_per_activity = """
    MATCH (n:HighLevelBatch)
    RETURN n.activity_name as activity, count(n) AS batch_count
    ORDER BY batch_count DESC
"""

def batches_per_activity(uri, username, password, cypher_query):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_query)
        data = result.data()
    driver.close()
    return data

results_batches_per_activity = batches_per_activity(uri, username, password, cypher_batches_per_activity)

activities = [str(record['activity']) for record in results_batches_per_activity]
batch_counts = [record['batch_count'] for record in results_batches_per_activity]

df = pd.DataFrame({
    'Activity': activities,
    'Batch Count': batch_counts
})

fig = px.bar(df, x='Activity', y='Batch Count', title='Batches per Activity',
             labels={'Activity': 'Activity', 'Batch Count': 'High Batch Count'},
             color='Batch Count',  
             text='Batch Count')  

fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', xaxis_tickangle=-45, 
                  xaxis_tickfont_size=8, xaxis_title_font_size=8, yaxis_title_font_size=8,
                  width=1200, height=600)

fig.show()

In [8]:
cypher_batches_per_activity = """
    MATCH (n:HighLevelBatch)
    RETURN size(n.activity_name) as activity_number, count(n) AS batch_count
    ORDER BY batch_count DESC
"""

def batches_per_activity(uri, username, password, cypher_query):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_query)
        data = result.data()
    driver.close()
    return data

results_batches_per_activity = batches_per_activity(uri, username, password, cypher_batches_per_activity)

activity_number = [str(record['activity_number']) for record in results_batches_per_activity]
batch_counts = [record['batch_count'] for record in results_batches_per_activity]

df = pd.DataFrame({
    'Activity Number': activity_number,
    'Batch Count': batch_counts
})

fig = px.bar(df, x='Activity Number', y='Batch Count', title='Activities per High-Level Batches',
             labels={'Activity Number': 'Activity Number', 'Batch Count': 'High Batch Count'},
             color='Batch Count', 
             text='Batch Count') 

fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', xaxis_tickangle=-45, 
                  xaxis_tickfont_size=8, xaxis_title_font_size=8, yaxis_title_font_size=8,
                  width=800, height=600)

fig.show()

In [9]:
cypher_batches_per_activity = """
    MATCH (n:HighLevelBatch)
    RETURN n.activity_name as activity, size(n.corr_batch_numbers) AS number_batches, count(n) AS batch_count
    ORDER BY batch_count DESC
"""

def batches_per_activity(uri, username, password, cypher_query):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_query)
        data = result.data()
    driver.close()
    return data

results_batches_per_activity = batches_per_activity(uri, username, password, cypher_batches_per_activity)

activities = [str(record['activity']) for record in results_batches_per_activity]
batch_counts = [record['batch_count'] for record in results_batches_per_activity]
batch_intstances = [record['number_batches'] for record in results_batches_per_activity]

df = pd.DataFrame({
    'Activity': activities,
    'Batch Count': batch_counts,
    'Batch Instances': batch_intstances
})

fig = px.bar(df, x='Activity', y='Batch Count', title='Batches per Activity Considering Correlated Batch Instances',
             labels={'Activity': 'Activity', 'Batch Count': 'High Batch Count'},
             color='Batch Instances',  
             text='Batch Count') 

fig.update_traces(texttemplate='%{text}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', xaxis_tickangle=-45, 
                  xaxis_tickfont_size=8, xaxis_title_font_size=8, yaxis_title_font_size=8,
                  width=1200, height=600)

fig.show()

In [10]:
cypher_batches_size_distribution = """
MATCH (u:Resource)-[:CORR]->(n:HighLevelBatch)
WITH u.sysId AS user, size(n.corr_batch_numbers) AS number_batches
RETURN user, number_batches, count(number_batches) AS frequency
ORDER BY user, number_batches
"""

def batches_size_distribution(uri, username, password, cypher_query):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_query)
        data = result.data()
        return data

results_batches_size_distribution = batches_size_distribution(uri, username, password, cypher_batches_size_distribution)

user = [record['user'] for record in results_batches_size_distribution]
number_batches = [record['number_batches'] for record in results_batches_size_distribution]
frequency = [record['frequency'] for record in results_batches_size_distribution]


df = pd.DataFrame({
    'User': user,
    'Number of Batches': number_batches,
    'Frequency': frequency
})


fig = px.bar(df, x='User', y='Frequency', color='Number of Batches', barmode='group',
             title='Distribution of HighLevelBatch Sizes for Each User',
             labels={'User': 'User', 'Frequency': 'Frequency', 'Number of Batches': 'Batch Size'})

fig.update_layout(
    xaxis_tickangle=-45,
    xaxis_tickfont_size=8,  
    xaxis_title_font_size=10,
    margin=dict(l=40, r=40, t=40, b=120),
    height=600
)

fig.show()

In [11]:
cypher_batches_size_distribution = """
MATCH (u:Resource)-[:CORR]->(n:HighLevelBatch)
WITH u.sysId AS user, n.activity_name AS activity
RETURN user, activity, count(activity) AS frequency
ORDER BY user, frequency
"""

def batches_size_distribution(uri, username, password, cypher_query):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_query)
        data = result.data()
        return data


results_batches_size_distribution = batches_size_distribution(uri, username, password, cypher_batches_size_distribution)

user = [record['user'] for record in results_batches_size_distribution]
activity = [str(record['activity']) for record in results_batches_size_distribution]
frequency = [record['frequency'] for record in results_batches_size_distribution]


df = pd.DataFrame({
    'User': user,
    'Activity': activity,
    'Frequency': frequency
})


fig = px.bar(df, x='User', y='Frequency', color='Activity', 
             title='Distribution of HighLevelBatch Activities for Each User',
             labels={'User': 'User', 'Frequency': 'Frequency', 'Activity': 'activity'})

fig.update_layout(
    xaxis_tickangle=-45,
    xaxis_tickfont_size=8,  
    xaxis_title_font_size=10,
    margin=dict(l=40, r=40, t=40, b=120),
    height=800,
    width=1500
)

fig.show()



