In [2]:
from neo4j import GraphDatabase
import pandas as pd
import numpy as np

from dotenv import load_dotenv
import os 
# db access 
load_dotenv()

NEO4J_URI = os.getenv("NEO4J_URI")
NEO4J_USERNAME = os.getenv("NEO4J_USERNAME")
NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")


print(f" URI: {NEO4J_URI}")
print(f" Username: {NEO4J_USERNAME}")
print(f"password : {NEO4J_PASSWORD}")


if not NEO4J_URI or not NEO4J_USERNAME or not NEO4J_PASSWORD:
    raise ValueError(" variables are not set ! ")

AUTH = (NEO4J_USERNAME, NEO4J_PASSWORD)
driver = GraphDatabase.driver(NEO4J_URI, auth = AUTH)
session = driver.session()
try:
      
        driver.verify_connectivity()
        print("connected to db ! ")
except Exception as e:
        print(f"Failed to connect : {e}")


 URI: neo4j+s://f7a0c2b2.databases.neo4j.io
 Username: neo4j
password : Ml8aeOW5Ra0RlM5Wa6pAYa5_PnAN2PPxcKvzESoWZuE
connected to db ! 


In [32]:
def fetch_interactions():
    """Fetch user interactions with trips and destinations from Neo4j."""
    query = """
    MATCH (u:User)-[r]->(t:Trip)
    RETURN u.id AS user, t.id AS item, 'Trip' AS item_type, type(r) AS interaction
    UNION
    MATCH (u:User)-[r]->(d:Destination)
    RETURN u.id AS user, d.id AS item, 'Destination' AS item_type, type(r) AS interaction
    UNION
    MATCH (u:User)-[r]->(e:Event)
    RETURN u.id AS user, e.id AS item, 'Event' AS item_type,type(r) AS interaction
    """
    
    with GraphDatabase.driver(uri=NEO4J_URI, auth=(NEO4J_USERNAME,NEO4J_PASSWORD)) as driver:
        with driver.session() as session:
            results = session.run(query)
            data = [(record["user"], record["item"], record["item_type"], record["interaction"]) for record in results]
    return pd.DataFrame(data, columns=["user", "item", "item_type", "interaction"])
data = fetch_interactions()
data

Unnamed: 0,user,item,item_type,interaction
0,633af53b-f78c-474c-9324-2a734bd86d24,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,SEARCHED_FOR
1,65ab857a-6ff4-493f-aa8d-ddde6463cc20,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,SEARCHED_FOR
2,633af53b-f78c-474c-9324-2a734bd86d24,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,WISHED
3,65ab857a-6ff4-493f-aa8d-ddde6463cc20,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,WISHED
4,633af53b-f78c-474c-9324-2a734bd86d24,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,CREATED
5,633af53b-f78c-474c-9324-2a734bd86d24,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,FAVORITED
6,72effc5b-589a-4076-9be5-f7c3d8533f70,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,FAVORITED
7,8aaafb9e-0f60-47d1-9b98-1b171564fbf9,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,FAVORITED
8,633af53b-f78c-474c-9324-2a734bd86d24,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,CLONED
9,633af53b-f78c-474c-9324-2a734bd86d24,273774a6-7fdd-429b-8d78-6ec63b909ba6,Trip,RECOMMENDED


In [19]:

def preprocess_data(df):
    """Convert interactions into a user-item matrix."""
    interaction_weights = {"VISITED": 5, "WISHED": 3, "SEARCHED_FOR": 1, "REVIEWED": 4}
    df["weight"] = df["interaction"].map(interaction_weights)
    
    pivot_table = df.pivot_table(index="user", columns=["item", "item_type"], values="weight", fill_value=0)
    return pivot_table
df = preprocess_data(data)
df

item,05cfceb2-9e2a-43c5-9d0b-20fc25d1c96f,273774a6-7fdd-429b-8d78-6ec63b909ba6,728e6824-d306-4e55-8fa6-67b2619ebad7,793871ab-0ff1-449e-b374-db859008fad4,7c0cc556-e845-4265-9e7e-006579156f51,7f283076-3378-4bf1-a556-ce74099d4e15,e228007d-9609-4b61-bb4a-92cd5efa710a
item_type,Trip,Trip,Trip,Destination,Event,Trip,Trip
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
43c2b772-c135-4791-b5fd-d073a106217b,0.0,0.0,0.0,0.0,0.0,2.0,0.0
633af53b-f78c-474c-9324-2a734bd86d24,0.0,2.0,0.0,3.25,1.0,0.0,0.0
65ab857a-6ff4-493f-aa8d-ddde6463cc20,0.0,2.0,0.0,0.0,0.0,0.0,0.0
72effc5b-589a-4076-9be5-f7c3d8533f70,0.0,0.0,2.0,0.0,0.0,0.0,0.0
99ae6489-05d2-49df-bb62-490a2a3f707b,0.0,0.0,0.0,0.0,0.0,0.0,2.0
9fc3a1e9-07d7-4736-8ab2-ecd353c93eea,0.0,0.0,0.0,0.0,0.0,0.0,2.0
b5aebe26-94b4-4125-b27e-d63033cbad0a,2.0,0.0,0.0,0.0,0.0,0.0,0.0
c71f78b1-5ee0-41a9-bfa5-170f0dc51bc4,0.0,0.0,0.0,0.0,0.0,2.0,0.0


In [25]:
from scipy.sparse.linalg import svds
def apply_svd(data, k=50):
    matrix = data.to_numpy(dtype=np.float64)  
    k = min(k, min(matrix.shape) - 1)
    U, sigma, Vt = svds(matrix, k=k)
    sigma = np.diag(sigma)
    predicted_ratings = np.dot(np.dot(U, sigma), Vt)
    return pd.DataFrame(predicted_ratings, index=data.index, columns=data.columns)
predicted_ratings = apply_svd(df,5)
predicted_ratings

item,05cfceb2-9e2a-43c5-9d0b-20fc25d1c96f,273774a6-7fdd-429b-8d78-6ec63b909ba6,728e6824-d306-4e55-8fa6-67b2619ebad7,793871ab-0ff1-449e-b374-db859008fad4,7c0cc556-e845-4265-9e7e-006579156f51,7f283076-3378-4bf1-a556-ce74099d4e15,e228007d-9609-4b61-bb4a-92cd5efa710a
item_type,Trip,Trip,Trip,Destination,Event,Trip,Trip
user,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
43c2b772-c135-4791-b5fd-d073a106217b,-2.389945e-18,3.996942e-16,-9.417466e-18,1.378304e-16,1.557962e-16,2.0,-3.499048e-16
633af53b-f78c-474c-9324-2a734bd86d24,-2.302252e-16,2.391331,-9.071914e-16,2.961322,0.9111761,-5.089372e-16,-1.487668e-15
65ab857a-6ff4-493f-aa8d-ddde6463cc20,5.425178e-16,0.7466266,2.137766e-15,0.9245906,0.2844894,6.661244e-16,-6.39408e-16
72effc5b-589a-4076-9be5-f7c3d8533f70,-5.6953e-18,2.650878e-15,2.0,-9.294672e-16,-4.467667e-16,4.100273e-16,-1.570161e-16
99ae6489-05d2-49df-bb62-490a2a3f707b,5.6021480000000006e-33,-4.820086e-16,2.960624e-32,-4.978433e-16,-2.551491e-17,-3.909569e-16,2.0
9fc3a1e9-07d7-4736-8ab2-ecd353c93eea,5.6021480000000006e-33,-4.820086e-16,2.960624e-32,-4.978433e-16,-2.551491e-17,-5.019792e-16,2.0
b5aebe26-94b4-4125-b27e-d63033cbad0a,2.0,6.727343e-16,4.811026e-17,-2.358783e-16,-1.133795e-16,-5.323255e-16,5.1210210000000003e-17
c71f78b1-5ee0-41a9-bfa5-170f0dc51bc4,-4.417925e-32,2.619939e-16,-1.8870170000000001e-31,-3.269174e-17,1.033278e-16,2.0,-1.513945e-17


In [29]:
def get_top_n_recommendations(user_id, predictions, item_type, df, n=5):
    """Get top-N recommended trips or destinations for a user."""
    item_ids = df[df["item_type"] == item_type]["item"].unique()
    sorted_items = predictions.loc[user_id].sort_values(ascending=False)
    return sorted_items[sorted_items.index.get_level_values("item").isin(item_ids)].head(n)
get_top_n_recommendations("c71f78b1-5ee0-41a9-bfa5-170f0dc51bc4" ,predicted_ratings,'Trip',data)

item                                  item_type
7f283076-3378-4bf1-a556-ce74099d4e15  Trip         2.000000e+00
273774a6-7fdd-429b-8d78-6ec63b909ba6  Trip         2.619939e-16
05cfceb2-9e2a-43c5-9d0b-20fc25d1c96f  Trip        -4.417925e-32
728e6824-d306-4e55-8fa6-67b2619ebad7  Trip        -1.887017e-31
e228007d-9609-4b61-bb4a-92cd5efa710a  Trip        -1.513945e-17
Name: c71f78b1-5ee0-41a9-bfa5-170f0dc51bc4, dtype: float64

In [31]:
from sklearn.cluster import KMeans
def cluster_users(matrix, num_clusters=5):
    """Cluster users based on their interaction patterns."""
    kmeans = KMeans(n_clusters=num_clusters, random_state=42)
    clusters = kmeans.fit_predict(matrix)
    return pd.DataFrame({"user": matrix.index, "cluster": clusters})
cluster_users(df)

Unnamed: 0,user,cluster
0,43c2b772-c135-4791-b5fd-d073a106217b,1
1,633af53b-f78c-474c-9324-2a734bd86d24,2
2,65ab857a-6ff4-493f-aa8d-ddde6463cc20,0
3,72effc5b-589a-4076-9be5-f7c3d8533f70,4
4,99ae6489-05d2-49df-bb62-490a2a3f707b,3
5,9fc3a1e9-07d7-4736-8ab2-ecd353c93eea,3
6,b5aebe26-94b4-4125-b27e-d63033cbad0a,0
7,c71f78b1-5ee0-41a9-bfa5-170f0dc51bc4,1


In [None]:
def find_similar_users(user_id, matrix, top_n=10):
    """Find top-N most similar users using cosine similarity."""
    similarity_matrix = cosine_similarity(matrix)
    user_idx = list(matrix.index).index(user_id)
    similarity_scores = list(enumerate(similarity_matrix[user_idx]))
    sorted_users = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similar_users = [matrix.index[i] for i, _ in sorted_users[1:top_n+1]]  # Exclude self
    return similar_users

In [None]:
df = fetch_interactions()
user_item_matrix = preprocess_data(df)
predicted_ratings = apply_svd(user_item_matrix)
user_clusters = cluster_users(user_item_matrix)

# Example usage: Get recommendations for a specific user
user_id = "user_123"  # Replace with actual user ID
print("Top 10 similar users:", find_similar_users(user_id, user_item_matrix, top_n=10))
print("Recommended Trips:", get_top_n_recommendations(user_id, predicted_ratings, "Trip", df, n=5))
print("Recommended Destinations:", get_top_n_recommendations(user_id, predicted_ratings, "Destination", df, n=5))