In [7]:
import pandas as pd
import numpy as np
from apyori import apriori

In [15]:
# Read the data into a pandas dataframe
data = pd.read_csv('time_logs.csv')

In [16]:
data

Unnamed: 0,timestamp,action,cluster,user,cluster_id
0,2023-05-11T06:55:58.870Z,userExitedCluster,HallWay,8:acs:c8b24614-e4fc-46fd-bf85-ea8a575f9e26_000...,0
1,2023-05-11T06:55:58.949Z,userEnteredCluster,Virtual-Chat,8:acs:c8b24614-e4fc-46fd-bf85-ea8a575f9e26_000...,1
2,2023-05-11T06:56:00.732Z,userExitedCluster,Virtual-Chat,8:acs:c8b24614-e4fc-46fd-bf85-ea8a575f9e26_000...,1
3,2023-05-11T06:56:00.781Z,userEnteredCluster,HallWay,8:acs:c8b24614-e4fc-46fd-bf85-ea8a575f9e26_000...,0
4,2023-05-11T06:56:04.898Z,userExitedCluster,HallWay,8:acs:c8b24614-e4fc-46fd-bf85-ea8a575f9e26_000...,0
5,2023-05-11T06:56:05.016Z,userEnteredCluster,STAIRWAY,8:acs:c8b24614-e4fc-46fd-bf85-ea8a575f9e26_000...,5
6,2023-05-11T06:56:07.217Z,userExitedCluster,STAIRWAY,8:acs:c8b24614-e4fc-46fd-bf85-ea8a575f9e26_000...,5
7,2023-05-11T06:56:07.294Z,userEnteredCluster,HallWay,8:acs:c8b24614-e4fc-46fd-bf85-ea8a575f9e26_000...,0


In [44]:
# Convert the timestamp column to a datetime format
data['timestamp'] = pd.to_datetime(data['timestamp'], utc=True, format="ISO8601")

# Extract the hour of the day from the timestamp column
data['seconds'] = data['timestamp'].dt.second

# Create a pivot table with users as rows and clusters as columns, and count the number of times each user visited each cluster
pivot_table = pd.pivot_table(data, values='timestamp', index='user', columns='cluster', aggfunc='count', fill_value=0)

# Convert the pivot table to a numpy array
dataset = np.array(pivot_table)

In [45]:
dataset

array([[4, 2, 2]])

In [46]:
# Run the apriori algorithm on the dataset
results = list(apriori(dataset, min_support=0.1, min_confidence=0.1))

In [47]:
results

[RelationRecord(items=frozenset({2}), support=1.0, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({2}), confidence=1.0, lift=1.0)]),
 RelationRecord(items=frozenset({4}), support=1.0, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({4}), confidence=1.0, lift=1.0)]),
 RelationRecord(items=frozenset({2, 4}), support=1.0, ordered_statistics=[OrderedStatistic(items_base=frozenset(), items_add=frozenset({2, 4}), confidence=1.0, lift=1.0), OrderedStatistic(items_base=frozenset({2}), items_add=frozenset({4}), confidence=1.0, lift=1.0), OrderedStatistic(items_base=frozenset({4}), items_add=frozenset({2}), confidence=1.0, lift=1.0)])]

In [48]:
# Print the association rules and their confidence scores
for rule in results:
    print("Rule:", rule[0], "->", rule[1])
    print("Support:", rule[1])
    for stat in rule[2]:
        print("Confidence:", stat.confidence)
        print("Lift:", stat.lift)
    print()

Rule: frozenset({2}) -> 1.0
Support: 1.0
Confidence: 1.0
Lift: 1.0

Rule: frozenset({4}) -> 1.0
Support: 1.0
Confidence: 1.0
Lift: 1.0

Rule: frozenset({2, 4}) -> 1.0
Support: 1.0
Confidence: 1.0
Lift: 1.0
Confidence: 1.0
Lift: 1.0
Confidence: 1.0
Lift: 1.0

