In [1]:
import psycopg2
import numpy as np
import datetime

In [2]:
# This query creates a table of users and items, for a specified 24 hour period
# The query only includes 'liked' content i.e. content which the user spent longer than usual on

def query_database():
        # Connect to the database
        conn = psycopg2.connect(
           host="ec2-54-159-10-112.compute-1.amazonaws.com",
           port=5432,
           database="backprop-bunch",
           user="root",
           password="backprop")

        cur = conn.cursor()

        query = """SELECT r.user_id, r.item_key,recieved_at
        FROM recommendations r
        JOIN (SELECT user_id, AVG(EXTRACT(SECOND FROM engagement_duration)) AS avg_duration
        FROM recommendations
        GROUP BY user_id) a ON r.user_id = a.user_id
        WHERE EXTRACT(SECOND FROM r.engagement_duration) > a.avg_duration AND sent_at >= '2023-03-29 00:00:00.000000' AND sent_at < '2023-03-30 00:00:00.000000' 
		ORDER BY r.user_id,recieved_at
        LIMIT 500;
        """     

        cur.execute(query)
        results = cur.fetchall()

        cur.close()
        conn.close()

        return results

In [None]:
results = np.array(query_database())  

In [3]:
def transitive_closure(tuples):
    closure = set(tuples)
    while True:
        new_tuples = set((x, z) for x, y1 in closure for y2, z in closure if y1 == y2)
        closure_until_now = closure | new_tuples
        if closure_until_now == closure:
            break
        closure = closure_until_now
    return closure

In [43]:
def build_item_to_item_similarity(results):
    
    n_it = len(np. unique(results[:,1]))
    item_to_item = np.zeros((n_it,n_it))
    unique_items = np.unique(results[:,1])
    unique_users = np.unique(results[:,0])
    
    enum = enumerate(unique_items)
    dict_item =dict((i,j) for j,i in enum)
    master_tuples = []
    
    for j in unique_users:
        tuples = []
        for i in range(np.shape(results)[0]-1):
            if(results[i,0] == j and results[i+1,0] == j):
                tuples.append((results[i,1],results[i+1,1]))
        # we know user j went to item y from item x, so we create a tuple (x,y) and add it to an empty list
        # If a user goes from to b after a (a,b) then to c after b (b,c) then we know the user went to c after a (a,c)
        # So we need to ensure transitive closure in our list of tuples for user j 
        for (x,y) in transitive_closure(tuples):
            if((x,y) not in tuples):
                tuples.append((x,y))
        # now to add to our master set of tuples for building the matrix
        for (x,y) in tuples:
            master_tuples.append((x,y))
    # now we've added all tuples for all users, we construct the matrix
    for (i,j) in master_tuples:
        item_to_item[dict_item[i],dict_item[j]]+=1       
    
    recommendations = ['a' for i in range(len(unique_items))]
    rec_int = np.argmax(item_to_item, axis=1) 

    for i in range(len(rec_int)):
        k = rec_int[i]
        recommendations[i] = unique_items[k]

    rec_dict = dict(zip(unique_items, recommendations))
    
    return(item_to_item,rec_dict)

In [46]:
test = np.array([[1, 'c', datetime.datetime(2023, 3, 31, 9, 30)],
                 [1, 'd', datetime.datetime(2023, 3, 31, 9, 40)],
                 [1, 'b', datetime.datetime(2023, 3, 31, 9, 50)],
                 [2, 'a', datetime.datetime(2023, 3, 31, 9, 20)],
                 [2, 'b', datetime.datetime(2023, 3, 31, 9, 25)],
                 [2, 'd', datetime.datetime(2023, 3, 31, 9, 40)],               
                 [3, 'c', datetime.datetime(2023, 3, 31, 9, 35)],
                 [3, 'a', datetime.datetime(2023, 3, 31, 9, 50)],                 
                 [3, 'b', datetime.datetime(2023, 3, 31, 9, 55)]])

In [48]:
build_test = build_item_to_item_similarity(test)
print(build_test)

(array([[0., 2., 0., 1.],
       [0., 0., 0., 1.],
       [1., 2., 0., 1.],
       [0., 1., 0., 0.]]), {'a': 'b', 'b': 'd', 'c': 'b', 'd': 'b'})
