In [25]:
import sys
sys.path.append('..')

import constants.file_handler_constants as fh
from constants.user_constants import *
from constants.attraction_constants import *

import os
import glob
import time
import pandas as pd
import numpy as np
import json
import requests
import ast
import random

from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity

In [26]:
def generate_ratings(num_users, num_attractions, sparsity=0.8):
    """
    Generates a sparse matrix of user-attraction ratings.

    Args:
        num_users: Number of users.
        num_attractions: Number of attractions.
        sparsity: Sparsity level (0.0 to 1.0).

    Returns:
        pandas.DataFrame: A DataFrame containing user_id, attraction_id, and rating_score.
    """

    data = []
    for user_id in range(1, num_users + 1):
        for attraction_id in range(1, num_attractions + 1):
            if random.random() > sparsity:  # Introduce sparsity
                rating = random.randint(1, 5)  # Random rating between 1 and 5
                data.append({'user_id': user_id, 'attraction_id': f'A{attraction_id}', 'rating_score': rating})

    return pd.DataFrame(data)

In [27]:
rating_df = generate_ratings(10, 20, sparsity=0.8)
rating_df

Unnamed: 0,user_id,attraction_id,rating_score
0,1,A8,5
1,1,A10,1
2,1,A11,5
3,1,A13,1
4,3,A4,2
5,3,A10,4
6,3,A11,4
7,3,A12,1
8,4,A1,5
9,4,A7,2


In [28]:
final_ratings_matrix = rating_df.pivot(index = 'user_id', columns ='attraction_id', values = 'rating_score').fillna(0)
final_ratings_matrix

attraction_id,A1,A10,A11,A12,A13,A14,A15,A17,A19,A2,A20,A3,A4,A5,A6,A7,A8,A9
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
3,0.0,4.0,4.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0
4,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,2.0,0.0,1.0
5,0.0,0.0,0.0,2.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
6,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,3.0,0.0
7,0.0,0.0,3.0,0.0,0.0,0.0,0.0,5.0,0.0,1.0,0.0,3.0,0.0,2.0,0.0,0.0,2.0,0.0
8,0.0,1.0,0.0,0.0,0.0,4.0,0.0,2.0,0.0,4.0,2.0,0.0,4.0,5.0,0.0,0.0,0.0,0.0
10,0.0,0.0,0.0,5.0,0.0,4.0,0.0,0.0,2.0,4.0,5.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0


In [29]:
# defining a function to get similar users
def similar_users(user_index, interactions_matrix):
    similarity = []
    for user in range(0, interactions_matrix.shape[0]): #  .shape[0] gives number of rows
        
        #finding cosine similarity between the user_id and each user
        sim = cosine_similarity([interactions_matrix.loc[user_index]], [interactions_matrix.loc[user]])
        
        #Appending the user and the corresponding similarity score with user_id as a tuple
        similarity.append((user,sim))
        
    similarity.sort(key=lambda x: x[1], reverse=True)
    most_similar_users = [tup[0] for tup in similarity] #Extract the user from each tuple in the sorted list
    similarity_score = [tup[1] for tup in similarity] ##Extracting the similarity score from each tuple in the sorted list
   
    #Remove the original user and its similarity score and keep only other similar users 
    most_similar_users.remove(user_index)
    similarity_score.remove(similarity_score[0])
       
    return most_similar_users, similarity_score
     

In [30]:
# defining the recommendations function to get recommendations by using the similar users' preferences
def recommendations(user_index, num_of_attractions, interactions_matrix):
    
    most_similar_users = similar_users(user_index, interactions_matrix)[0]
    
    #Finding attractionsuct IDs with which the user_id has interacted
    attractions_ids = set(list(interactions_matrix.columns[np.where(interactions_matrix.loc[user_index] > 0)]))
    recommendations = []
    
    observed_interactions = attractions_ids.copy()
    for similar_user in most_similar_users:
        if len(recommendations) < num_of_attractions:
            
            #Finding 'n' attractions which have been rated by similar users but not by the user_id
            similar_user_attractions_ids = set(list(interactions_matrix.columns[np.where(interactions_matrix.loc[similar_user] > 0)]))
            recommendations.extend(list(similar_user_attractions_ids.difference(observed_interactions)))
            observed_interactions = observed_interactions.union(similar_user_attractions_ids)
        else:
            break
    
    return recommendations[:num_of_attractions]