# Extended Resource Exploration Using Implemented EKG

In [2]:
from neo4j import GraphDatabase
import pandas as pd
from datetime import datetime, date
import plotly.express as px
import os
from dotenv import load_dotenv


In [3]:
load_dotenv()
uri = os.getenv('NEO4J_URI')
username = os.getenv('NEO4J_USER')
password = os.getenv('NEO4J_PASSWORD')

In [4]:
driver = GraphDatabase.driver(uri, auth=(username, password))

# Shift Analysis

In [5]:
cypher_first = """
MATCH (u:Resource)<-[r:CORR]-(e:Event)-[r1:CORR]->(k:Kit)


RETURN 
  u.sysId AS user_id, 
  datetime(e.timestamp) AS event_timestamp,
  date(e.timestamp) AS event_date, 
  e.activity as activity,
  k.kitId as Kit,
  k.runId as Run


ORDER BY 
  event_timestamp, user_id ASC 

 """

def first(uri, username, password, cypher_first):
    driver = GraphDatabase.driver(uri, auth=(username, password))
    with driver.session() as session:
        result = session.run(cypher_first)
        data = result.data()
        return data
    
results_first = first(uri, username, password, cypher_first)

In [6]:
def convert_neo4j_date_to_python(neo4j_date):
    # Converting neo4j.time.Date to Python's datetime.date
    return date(
        year=neo4j_date.year,
        month=neo4j_date.month,
        day=neo4j_date.day
    )

def convert_neo4j_datetime_to_python(neo4j_datetime):
    # Converting neo4j.time.DateTime to Python's datetime.datetime
    return datetime(
        year=neo4j_datetime.year,
        month=neo4j_datetime.month,
        day=neo4j_datetime.day,
        hour=neo4j_datetime.hour,
        minute=neo4j_datetime.minute,
        second=neo4j_datetime.second,
        microsecond=neo4j_datetime.nanosecond // 1000,  # Convert nanoseconds to microseconds
        tzinfo=neo4j_datetime.tzinfo
    )

df_first = pd.DataFrame(results_first)

df_first['event_date'] = df_first['event_date'].apply(convert_neo4j_date_to_python)

df_first['event_date'] = df_first['event_date'].apply(lambda d: d.strftime('%Y-%m-%d'))

df_first['event_timestamp'] = df_first['event_timestamp'].apply(convert_neo4j_datetime_to_python)

df_first['user_id'] = df_first['user_id'].astype(str)

In [7]:
df_first['event_timestamp'] = pd.to_datetime(df_first['event_timestamp'])

df_first['event_date'] = df_first['event_timestamp'].dt.date

df_first['event_date'] = pd.to_datetime(df_first['event_date'])

In [8]:
df_first['month'] = df_first['event_date'].dt.month
df_first['day_of_week'] = df_first['event_date'].dt.dayofweek

In [9]:
users_work_schedule = df_first.groupby(['user_id', 'event_date']).agg({
    'event_timestamp': ['min', 'max']
}).reset_index()

users_work_schedule.columns = ['user_id', 'event_date', 'earliest_timestamp', 'latest_timestamp']

users_work_schedule = users_work_schedule.sort_values(by=['event_date','earliest_timestamp'])

In [10]:
users_work_schedule_sorted = users_work_schedule.sort_values(by=['event_date', 'earliest_timestamp'])

users_work_schedule_sorted['time_passed'] = users_work_schedule_sorted.groupby('event_date')['earliest_timestamp'].transform(lambda x: x.diff())

In [11]:
users_work_schedule_sorted['time_passed_minutes'] = users_work_schedule_sorted['time_passed'].dt.seconds / 60

users_work_schedule_sorted['time_passed_minutes'] += users_work_schedule_sorted['time_passed'].dt.components['hours'] * 60

In [12]:
time_passed_minutes = users_work_schedule_sorted['time_passed'].dropna().dt.total_seconds() / 60
fig = px.histogram(time_passed_minutes, nbins=100, labels={'value': 'Time Passed (minutes)', 'count': 'Frequency'},
                   title='Distribution of Time Passed between Starting Work Resources (Excluding NaN)')
fig.update_layout(bargap=0.1)
fig.show()


In [13]:
users_work_schedule['earliest_timestamp'] = pd.to_datetime(users_work_schedule['earliest_timestamp'])

# Extract date and hour-minute-second information
users_work_schedule['event_date'] = users_work_schedule['earliest_timestamp'].dt.date
users_work_schedule['start_time'] = users_work_schedule['earliest_timestamp'].dt.strftime('%H:%M:%S')

# Extract time as seconds since midnight for continuous x-axis
users_work_schedule['time_seconds'] = (
    users_work_schedule['earliest_timestamp'].dt.hour * 3600 +
    users_work_schedule['earliest_timestamp'].dt.minute * 60 +
    users_work_schedule['earliest_timestamp'].dt.second
)

# Create scatter plot
fig = px.scatter(
    users_work_schedule,
    x='time_seconds',
    y='event_date',
    color='user_id',
    title='Earliest Timestamps of Users',
    labels={'time_seconds': 'Time of Day (HH:MM)', 'event_date': 'Event Date'},
    hover_data={'user_id': True, 'start_time': True}
)

# Update layout for better readability
fig.update_layout(
    xaxis=dict(
        tickmode='array',
        tickvals=[i * 3600 for i in range(0, 24)],
        ticktext=[f'{i:02d}:00' for i in range(0, 24)],
        title='Time of Day (HH:MM)',
        range=[0, 86400]  # Full range from 00:00 to 23:59:59 in seconds
    ),
    yaxis=dict(
        title='Event Date'
    ),
    showlegend=False
)

# Show the plot
fig.show()





In [14]:
users_work_schedule['earliest_timestamp'] = pd.to_datetime(users_work_schedule['earliest_timestamp'])

# Extract the time part from 'earliest_timestamp'
users_work_schedule['time'] = users_work_schedule['earliest_timestamp'].dt.time

# Filter for people with earliest_timestamp between 00:00 and 08:00
early_morning_users = users_work_schedule[(users_work_schedule['time'] >= pd.to_datetime('00:00').time()) & (users_work_schedule['time'] < pd.to_datetime('08:00').time())]

# Filter for people with earliest_timestamp between 08:00 and 09:00
late_morning_users = users_work_schedule[(users_work_schedule['time'] >= pd.to_datetime('08:00').time()) & (users_work_schedule['time'] < pd.to_datetime('09:00').time())]

# Count unique users per day within each time range
early_morning_user_counts = early_morning_users.groupby(early_morning_users['earliest_timestamp'].dt.date)['user_id'].nunique()
late_morning_user_counts = late_morning_users.groupby(late_morning_users['earliest_timestamp'].dt.date)['user_id'].nunique()

# Calculate the average number of users in each time range
avg_early_morning_users = early_morning_user_counts.mean()
avg_late_morning_users = late_morning_user_counts.mean()

# Display the results
print(f"Average number of users between 00:00 and 08:00: {int(avg_early_morning_users)}")
print(f"Average number of users between 08:00 and 09:00: {int(avg_late_morning_users)}")

Average number of users between 00:00 and 08:00: 2
Average number of users between 08:00 and 09:00: 1


In [15]:
users_work_schedule['latest_timestamp'] = pd.to_datetime(users_work_schedule['latest_timestamp'])

# Extract date and hour-minute-second information
users_work_schedule['event_date'] = users_work_schedule['latest_timestamp'].dt.date
users_work_schedule['end_time'] = users_work_schedule['latest_timestamp'].dt.strftime('%H:%M:%S')

# Extract time as seconds since midnight for continuous x-axis
users_work_schedule['time_seconds'] = (
    users_work_schedule['latest_timestamp'].dt.hour * 3600 +
    users_work_schedule['latest_timestamp'].dt.minute * 60 +
    users_work_schedule['latest_timestamp'].dt.second
)

# Create scatter plot
fig = px.scatter(
    users_work_schedule,
    x='time_seconds',
    y='event_date',
    color='user_id',
    title='Latest Timestamps of Users',
    labels={'time_seconds': 'Time of Day (HH:MM)', 'event_date': 'Event Date'},
    hover_data={'user_id': True, 'end_time': True}
)

# Update layout for better readability
fig.update_layout(
    xaxis=dict(
        tickmode='array',
        tickvals=[i * 3600 for i in range(0, 24)],
        ticktext=[f'{i:02d}:00' for i in range(0, 24)],
        title='Time of Day (HH:MM)',
        range=[0, 86400]  # Full range from 00:00 to 23:59:59 in seconds
    ),
    yaxis=dict(
        title='Event Date'
    ),
    showlegend=False
)

# Show the plot
fig.show()





# Shifts Definition

### from 00:00 to 9:30
### from 9:30 to 11:30
### from 11:30 to 14:00
### from 14:00 to 23:59

# Separation to Shifts

In [16]:
users_work_schedule['earliest_timestamp'] = pd.to_datetime(users_work_schedule['earliest_timestamp'])

def assign_shift(timestamp):
    if timestamp.time() >= pd.Timestamp('00:00:00').time() and timestamp.time() < pd.Timestamp('09:30:00').time():
        return '1'
    elif timestamp.time() >= pd.Timestamp('09:30:00').time() and timestamp.time() < pd.Timestamp('11:30:00').time():
        return '2'
    elif timestamp.time() >= pd.Timestamp('11:30:00').time() and timestamp.time() < pd.Timestamp('14:00:00').time():
        return '3'
    else:
        return '4'

users_work_schedule['shift'] = users_work_schedule['earliest_timestamp'].apply(assign_shift)

In [17]:
weekday_working_user_counts = df_first[df_first['day_of_week'] < 5].groupby('event_date')['user_id'].nunique()
weekend_working_user_counts = df_first[df_first['day_of_week'] >= 5].groupby('event_date')['user_id'].nunique()

max_users_weekday = weekday_working_user_counts.max()
min_users_weekday = weekday_working_user_counts.min()
avg_users_weekday = weekday_working_user_counts.mean()
max_weekday_date = weekday_working_user_counts.idxmax()
min_weekday_date = weekday_working_user_counts.idxmin()

max_users_weekend = weekend_working_user_counts.max()
min_users_weekend = weekend_working_user_counts.min()
avg_users_weekend = weekend_working_user_counts.mean()
max_weekend_date = weekend_working_user_counts.idxmax()
min_weekend_date = weekend_working_user_counts.idxmin()

print("Max number of unique users for weekdays:", max_users_weekday, "on", max_weekday_date.strftime('%Y-%m-%d'))
print("Min number of unique users for weekdays:", min_users_weekday, "on", min_weekday_date.strftime('%Y-%m-%d'))
print("Average number of unique users for weekdays:", int(avg_users_weekday))

print("Max number of unique users for weekends:", max_users_weekend, "on", max_weekend_date.strftime('%Y-%m-%d'))
print("Min number of unique users for weekends:", min_users_weekend, "on", min_weekend_date.strftime('%Y-%m-%d'))
print("Average number of unique users for weekends:", int(avg_users_weekend))

Max number of unique users for weekdays: 15 on 2022-01-10
Min number of unique users for weekdays: 2 on 2022-01-06
Average number of unique users for weekdays: 13
Max number of unique users for weekends: 3 on 2022-03-26
Min number of unique users for weekends: 2 on 2022-01-01
Average number of unique users for weekends: 2
