In [1]:
import sys
sys.path.append('..')

import constants.file_handler_constants as fh
from constants.user_constants import *
from constants.attraction_constants import *

import os
import glob
import time
import pandas as pd
import numpy as np
import json
import requests
import ast
import random

from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity

from scipy.sparse import csr_matrix

In [2]:
def generate_ratings(num_users, num_attractions, sparsity=0.8):
    """
    Generates a sparse matrix of user-attraction ratings.

    Args:
        num_users: Number of users.
        num_attractions: Number of attractions.
        sparsity: Sparsity level (0.0 to 1.0).

    Returns:
        pandas.DataFrame: A DataFrame containing user_id, attraction_id, and rating_score.
    """

    data = []
    for user_id in range(1, num_users + 1):
        for attraction_id in range(1, num_attractions + 1):
            if random.random() > sparsity:  # Introduce sparsity
                rating = random.randint(1, 5)  # Random rating between 1 and 5
                data.append({'user_id': user_id, 'attraction_id': f'A{attraction_id}', 'rating_score': rating})

    return pd.DataFrame(data)

In [3]:
# Users with Similar Behavior to User 1: 2, 9, 11, 12, ...
data = {
    "attraction_id": ["A1", "A10", "A11", "A12", "A13", "A14", "A15", "A17", "A19", "A2", "A20", "A3", "A4", "A5", "A6", "A7", "A8", "A9"],
    1:  [0.0, 1.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0],
    2:  [0.0, 1.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0],
    3:  [0.0, 4.0, 4.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0],
    4:  [5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 1.0],
    5:  [0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
    6:  [0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 3.0, 0.0],
    7:  [0.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 1.0, 0.0, 3.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0],
    8:  [0.0, 1.0, 0.0, 0.0, 0.0, 4.0, 0.0, 2.0, 0.0, 4.0, 2.0, 0.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0],
    9:  [0.0, 1.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0],
    10: [0.0, 0.0, 0.0, 5.0, 0.0, 4.0, 0.0, 0.0, 2.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0],
    # 11: [0.0, 1.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0],
    # 12: [0.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0],
    # 13: [5.0, 4.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 3.0, 0.0, 0.0],
    # 14: [0.0, 0.0, 0.0, 3.0, 0.0, 0.0, 4.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 3.0, 0.0, 1.0, 0.0],
    # 15: [1.0, 2.0, 0.0, 0.0, 4.0, 0.0, 0.0, 0.0, 0.0, 4.0, 2.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 0.0],
    # 16: [0.0, 1.0, 5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0],
    # 17: [0.0, 1.0, 4.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 5.0, 0.0],
    # 18: [4.0, 0.0, 0.0, 5.0, 0.0, 3.0, 2.0, 0.0, 0.0, 3.0, 5.0, 0.0, 1.0, 0.0, 0.0, 0.0, 3.0, 0.0],
    # 19: [0.0, 0.0, 0.0, 5.0, 0.0, 5.0, 0.0, 0.0, 2.0, 0.0, 0.0, 5.0, 0.0, 4.0, 0.0, 0.0, 2.0, 0.0],
    # 20: [0.0, 5.0, 5.0, 0.0, 3.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 0.0, 1.0, 0.0, 2.0, 0.0, 4.0, 0.0],
}

df = pd.DataFrame(data).set_index("attraction_id").T

# Transpose the dataframe: Rows = Users, Columns = Attractions
ratings_matrix = df

# Fill NaN values with 0 (if any)
ratings_matrix = ratings_matrix.fillna(0)
ratings_matrix

attraction_id,A1,A10,A11,A12,A13,A14,A15,A17,A19,A2,A20,A3,A4,A5,A6,A7,A8,A9
1,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
2,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
3,0.0,4.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
4,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0
5,0.0,0.0,0.0,2.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,3.0,0.0
7,0.0,0.0,3.0,0.0,0.0,0.0,0.0,5.0,0.0,1.0,0.0,3.0,0.0,2.0,0.0,0.0,2.0,0.0
8,0.0,1.0,0.0,0.0,0.0,4.0,0.0,2.0,0.0,4.0,2.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0
9,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
10,0.0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,2.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0


In [4]:
# rating_df = generate_ratings(10, 20, sparsity=0.8)
# rating_df

In [5]:
# final_ratings_matrix = rating_df.pivot(index = 'user_id', columns ='attraction_id', values = 'rating_score').fillna(0)
# final_ratings_matrix

In [6]:
# defining a function to get similar users
def similar_users(user_index, interactions_matrix):
    similarity = []
    for user in interactions_matrix.index: 
        #finding cosine similarity between the user_id and each user
        sim = cosine_similarity([interactions_matrix.loc[user_index]], [interactions_matrix.loc[user]])
        #Appending the user and the corresponding similarity score with user_id as a tuple
        similarity.append((user,sim))

    similarity.sort(key=lambda x: x[1], reverse=True)
    most_similar_users = [tup[0] for tup in similarity] #Extract the user from each tuple in the sorted list
    similarity_score = [tup[1] for tup in similarity] ##Extracting the similarity score from each tuple in the sorted list
   
    #Remove the original user and its similarity score and keep only other similar users 
    most_similar_users.remove(user_index)
    similarity_score.remove(similarity_score[0])
       
    return most_similar_users, similarity_score
     

In [7]:
# defining the recommendations function to get recommendations by using the similar users' preferences
def recommendations(user_id, ratings_matrix):
    ratings_sparse = csr_matrix(ratings_matrix)

    knn = NearestNeighbors(metric='cosine', algorithm='brute')
    knn.fit(ratings_sparse)

    # Find the user's ratings
    user_index = ratings_matrix.index.tolist().index(user_id)
    
    # Get neighbors
    distances, indices = knn.kneighbors(ratings_sparse[user_index], n_neighbors=3+1)
   
    print(indices)
    print("check sim user")
    for i, idx in enumerate(indices[0]):
        print(f"{idx+1} -> (Similarity: {1 - distances[0][i]:.2f})")

    # Get similar users
    similar_users = indices.flatten()[1:] + 1  # Exclude the user itself
    print("check similar_users", similar_users)

    # Aggregate attraction ratings from similar users
    similar_users_ratings = ratings_matrix.iloc[similar_users].mean(axis=0)
    
    # Exclude already rated attractions by the user
    user_ratings = ratings_matrix.iloc[user_index]
    recommendations = similar_users_ratings[user_ratings == 0]
    
    # Sort recommendations by highest score
    recommendations = recommendations.sort_values(ascending=False)
    
    return recommendations.head(30)  # Return top 5 recommendations

In [8]:
res_recommendations_df = recommendations(1,ratings_matrix)
res_recommendations_df

[[0 1 8 2]]
check sim user
1 -> (Similarity: 1.00)
2 -> (Similarity: 1.00)
9 -> (Similarity: 1.00)
3 -> (Similarity: 0.55)
check similar_users [2 9 3]


attraction_id
A20    2.666667
A12    2.000000
A1     1.666667
A14    1.333333
A2     1.333333
A19    0.666667
A4     0.666667
A7     0.666667
A9     0.333333
A15    0.000000
A17    0.000000
A3     0.000000
A6     0.000000
A5     0.000000
dtype: float64

In [9]:
ratings_matrix.values

array([[0., 1., 5., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        5., 0.],
       [0., 1., 5., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        5., 0.],
       [0., 4., 4., 1., 0., 0., 0., 0., 0., 0., 0., 0., 2., 0., 0., 0.,
        0., 0.],
       [5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 3., 0., 0., 0., 0., 2.,
        0., 1.],
       [0., 0., 0., 2., 0., 0., 4., 0., 0., 0., 0., 0., 0., 0., 1., 0.,
        0., 0.],
       [0., 3., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 5., 0., 0.,
        3., 0.],
       [0., 0., 3., 0., 0., 0., 0., 5., 0., 1., 0., 3., 0., 2., 0., 0.,
        2., 0.],
       [0., 1., 0., 0., 0., 4., 0., 2., 0., 4., 2., 0., 4., 5., 0., 0.,
        0., 0.],
       [0., 1., 5., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        5., 0.],
       [0., 0., 0., 5., 0., 4., 0., 0., 2., 4., 5., 0., 0., 0., 0., 0.,
        3., 0.]])

In [10]:
res_recommendations = res_recommendations_df.index
res_recommendations

Index(['A20', 'A12', 'A1', 'A14', 'A2', 'A19', 'A4', 'A7', 'A9', 'A15', 'A17',
       'A3', 'A6', 'A5'],
      dtype='object', name='attraction_id')

In [11]:
ratings_matrix

attraction_id,A1,A10,A11,A12,A13,A14,A15,A17,A19,A2,A20,A3,A4,A5,A6,A7,A8,A9
1,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
2,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
3,0.0,4.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
4,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0
5,0.0,0.0,0.0,2.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,3.0,0.0
7,0.0,0.0,3.0,0.0,0.0,0.0,0.0,5.0,0.0,1.0,0.0,3.0,0.0,2.0,0.0,0.0,2.0,0.0
8,0.0,1.0,0.0,0.0,0.0,4.0,0.0,2.0,0.0,4.0,2.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0
9,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
10,0.0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,2.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0
