In [1]:
import json
import numpy as np
from numpy.linalg import norm
from neo4j import GraphDatabase

In [2]:
def cosine_similarity(A, B):
    if not A or not B:
      return -1
    A = np.array(A, dtype=int)
    B = np.array(B, dtype=int)
    a_ref = A.reshape(-1,1)
    b_ref = B.reshape(1,-1)
    sim = np.dot(a_ref, b_ref)/(norm(a_ref)*norm(b_ref))
    result = sim.mean()
    return result

In [None]:
def compute_knn(item, items, k):
  knn_values = []
  for other_item in items:
      if other_item != item:
          value = cosine_similarity(items[item], items[other_item])
          if value > 0:
              knn_values.append((other_item, value))
  knn_values.sort(key=lambda x: -x[1])
  return knn_values[:k]

In [None]:
"""
MATCH (u:User)-[r]-(s:Symptom) 
RETURN u.user AS user, collect(s.symptom) AS symptoms, collect(toIntegerOrNull(s.weight)) AS weights
"""

In [None]:
"""
MATCH (u:User)-[r]-(s:Symptom) 
WITH DISTINCT u.user AS user, collect(s.symptom) AS symptoms, collect(toIntegerOrNull(s.weight)) AS weights
UNWIND symptoms AS symptom
UNWIND weights AS weight
CREATE (p:Patient {patient:user, symptom: symptom, weight: weight})
"""

In [None]:
"""
CALL gds.graph.project(
    'diseaseGraph',
    {
        Patient: {
            properties: ['user','symptoms','weights']
        }
    },
    '*'
);
"""

In [None]:
def get_item_vectors(session, context):
    list_of_items_query = """
            MATCH (u:User)-[r]-(s:Symptom) 
            RETURN u.user AS user, collect(s.symptom) AS symptoms, collect(toIntegerOrNull(s.weight)) AS weights
        """
    context_info = context[1].copy()
    match_query = """
                MATCH (event:Event)-[:EVENT_ITEM]->(item:Item)
                MATCH (event)-[:EVENT_USER]->(user:User)
            """
    where_query = """
                WHERE u.user = $user
            """

    if "country" in context_info:
        match_query += "MATCH (u:User) "
        where_query += "AND u.country = $country "

    return_query = """
                WITH user.user as userId, event.rating as rating
                ORDER BY id(user)
                RETURN collect(distinct userId) as vector 
            """

    query = match_query + where_query + return_query
    items_VSM_sparse = {}
    with session as s:
        i = 0
        for item in s.run(list_of_items_query):
            item_id = item["itemId"]
            context_info["itemId"] = item_id
            vector = s.run(query, context_info)
            items_VSM_sparse[item_id] = vector.single()[0]
            i += 1
            if i % 100 == 0:
                print(i, "rows processed")
        print(i, "rows processed")
    print(len(items_VSM_sparse))
    return items_VSM_sparse

In [None]:
def store_knn(item, knn, context, session):
  print('saving knn')
  context_id = context[0]
  params = context[1].copy()
  with session as s:
      tx = s.begin_transaction()
      knnMap = {a: b for a, b in knn}
      clean_query = """
          MATCH (s:Similarity)-[:RELATED_TO_SOURCE_ITEM]->(item:Item)
          WHERE item.itemId = $itemId AND s.contextId = $contextId
          DETACH DELETE s
      """

      query = """
          MATCH (item:Item)
          WHERE item.itemId = $itemId
          UNWIND keys($knn) as otherItemId
          MATCH (other:Item)
          WHERE other.itemId = otherItemId
          CREATE (similarity:Similarity {weight: $knn[otherItemId], contextId: $contextId})
          MERGE (item)<-[:RELATED_TO_SOURCE_ITEM]-(similarity)
          MERGE (other)<-[:RELATED_TO_DEST_ITEM ]-(similarity)
      """

      if "location" in params:
          query += "WITH similarity MATCH (location:Location {value: $location}) "
          query += "MERGE (location)<-[:RELATED_TO]-(similarity) "

      tx.run(clean_query, {"itemId": item, "contextId": context_id})
      params["itemId"] = item
      params["contextId"] = context_id
      params["knn"] = knnMap
      tx.run(query, params)
      tx.commit()

In [None]:
def compute_and_store_similarity(contexts):
  for context in contexts:
      items_VSM = get_item_vectors(context)
      for item in items_VSM:
          knn = compute_knn(item, items_VSM.copy(), 20)
          store_knn(item, knn, context)

In [None]:
with open('config.json', 'r') as f:
    credentials = json.load(f)

uri = credentials['uri']
user = credentials['user']
password = credentials['password']
driver = GraphDatabase.driver(uri, auth=(user, password))
session = driver.session()