In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
def return_attractions():
    attractions_data = pd.read_csv('datasets/final_attractions.csv', usecols=['Name', 'Rating', 'Visitors', 'Historical', 'Natural', 'Amusement', 'Beach'])
    attractions = []
    for i in range(len(attractions_data)):
        attributes = [attractions_data['Rating'][i], attractions_data['Visitors'][i], attractions_data['Historical'][i], attractions_data['Natural'][i], attractions_data['Amusement'][i], attractions_data['Beach'][i]]
        if attributes:
            attractions.append(attributes)

    attractions = np.array(attractions)
    return attractions

return_attractions()

array([[4.8e+00, 3.0e+07, 5.0e+00, 4.0e+00, 2.0e+00, 0.0e+00],
       [4.5e+00, 2.0e+06, 2.0e+00, 5.0e+00, 3.0e+00, 0.0e+00],
       [4.4e+00, 1.0e+06, 3.0e+00, 5.0e+00, 3.0e+00, 0.0e+00],
       ...,
       [4.8e+00, 5.8e+07, 2.0e+00, 2.0e+00, 5.0e+00, 0.0e+00],
       [4.6e+00, 1.0e+07, 2.0e+00, 3.0e+00, 4.0e+00, 5.0e+00],
       [4.9e+00, 4.0e+06, 2.0e+00, 5.0e+00, 3.0e+00, 0.0e+00]])

In [3]:
def return_user_attractions_rating():
    users = ['Noel','Harsh', 'Vikranth','Muthuraj','Armaan']
    user_attractions_data = pd.read_csv('datasets/user_attractions_rating.csv')
    user_attractions = []
    for i in range(len(user_attractions_data)):
        attributes = []
        for j in users:
            attributes.append(user_attractions_data[j][i])
        
        user_attractions.append(attributes)
    user_attractions_data = np.array(user_attractions)

    return user_attractions_data

In [4]:
scaler = StandardScaler()
X = return_attractions()
Y = return_user_attractions_rating()
scaler.fit(X)
X = scaler.transform(X)

num_users = Y.shape[1]
num_features = X.shape[1]
num_attractions = X.shape[0]
W = np.random.rand(num_users, num_features)
B = np.random.rand(1,num_users)


print(W.shape)
print(B.shape)
print("X shape: ", X.shape)
print("Y shape: ", Y.shape)

(5, 6)
(1, 5)
X shape:  (1252, 6)
Y shape:  (1252, 5)


In [5]:
def cofi_cost_func(X, W, b, Y, lambda_):
    nm, n = X.shape
    nm, nu = Y.shape
    J = 0
    for i in range(nm):
        for j in range(nu):
            sum_ = 0
            if Y[i][j] == 0:
                continue
            for k in range(n):
                sum_ += (W[j][k] * X[i][k])
            cost = (sum_ + b[0][j] - Y[i][j]) ** 2
            J+= (cost/2)
        
    for j in range(nu):
        for k in range(n):
            cost = (W[j][k]) ** 2
            J += (cost * (lambda_ / 2))
            
    for j in range(nm):
        for k in range(n):
            cost = (X[j][k]) ** 2
            J += (cost * (lambda_ / 2))
    
        

    return J

In [6]:
print(f"Cost without regularization: {cofi_cost_func(X, W, B, Y, 0)}")
print(f"Cost with regularization: {cofi_cost_func(X, W, B, Y, 1.5)}")

Cost without regularization: 13470.672782681226
Cost with regularization: 19111.181893294364


In [7]:

tf.random.set_seed(1234)
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_attractions, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1, num_users),   dtype=tf.float64),  name='B')

optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [8]:
iterations = 100
lambda_ = 0.1

for iter in range(iterations):

    with tf.GradientTape() as tape:
        cost_value = cofi_cost_func(X, W, b, Y, lambda_)

    grads = tape.gradient( cost_value, [X,W,b] )

    optimizer.apply_gradients(zip(grads, [X,W,b]) )
    if iter % 10 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 21688.7
Training loss at iteration 10: 3097.3
Training loss at iteration 20: 819.9
Training loss at iteration 30: 663.8
Training loss at iteration 40: 520.5
Training loss at iteration 50: 439.7
Training loss at iteration 60: 380.5
Training loss at iteration 70: 328.1
Training loss at iteration 80: 283.6
Training loss at iteration 90: 245.0


In [9]:
predicted_ratings = np.dot(X, np.transpose(W)) + b

for user in range(num_users):
    print(f"User {user+1}:")
    for attraction in range(num_attractions):
        if Y[attraction][user] == 0:
            print(f"Attraction {attraction+1}: Predicted Rating = {predicted_ratings[attraction][user]:.2f}, Actual Rating = Not Rated")
            continue
        print(f"Attraction {attraction+1}: Predicted Rating = {predicted_ratings[attraction][user]:.2f}, Actual Rating = {Y[attraction][user]}")
    print("***************************************************")

User 1:
Attraction 1: Predicted Rating = 3.18, Actual Rating = Not Rated
Attraction 2: Predicted Rating = 2.64, Actual Rating = Not Rated
Attraction 3: Predicted Rating = 1.78, Actual Rating = Not Rated
Attraction 4: Predicted Rating = 3.23, Actual Rating = Not Rated
Attraction 5: Predicted Rating = 3.66, Actual Rating = Not Rated
Attraction 6: Predicted Rating = 4.47, Actual Rating = 4.5
Attraction 7: Predicted Rating = 1.82, Actual Rating = Not Rated
Attraction 8: Predicted Rating = 4.48, Actual Rating = 4.5
Attraction 9: Predicted Rating = 3.07, Actual Rating = Not Rated
Attraction 10: Predicted Rating = 1.80, Actual Rating = Not Rated
Attraction 11: Predicted Rating = 2.79, Actual Rating = Not Rated
Attraction 12: Predicted Rating = 4.40, Actual Rating = 4.4
Attraction 13: Predicted Rating = 3.34, Actual Rating = Not Rated
Attraction 14: Predicted Rating = 2.20, Actual Rating = Not Rated
Attraction 15: Predicted Rating = 4.26, Actual Rating = 4.3
Attraction 16: Predicted Rating = 1

In [10]:
predicted_ratings = np.array(predicted_ratings)
print(predicted_ratings.shape)

(1252, 5)


In [15]:
import csv
write_file = open("datasets/user_ratings.csv", "w", newline='')
writer = csv.writer(write_file)
attractions_data = pd.read_csv('datasets/final_attractions.csv', usecols=['Name'])
users = ['Noel','Harsh', 'Vikranth','Muthuraj','Armaan']
row = ['Attraction'] + users

writer.writerow(row)
for i in range(num_attractions):
    attraction_row = []
    attraction_row.append(attractions_data['Name'][i])
    for j in range(num_users):
        if Y[i][j] == 0:
            attraction_row.append(predicted_ratings[i][j])
            continue
        attraction_row.append(Y[i][j])
    writer.writerow(attraction_row)



In [12]:
state = 'Goa'
user = 'Noel'
number_of_attractions = 5
attractions_data = pd.read_csv('datasets/final_attractions.csv', usecols=['Name','State','Description'])
attraction_names = []
attractions_description = {}
for i in range(len(attractions_data)):
    if (attractions_data['State'][i] == state):
        attraction_names.append(attractions_data['Name'][i])
        attractions_description[attractions_data['Name'][i]] = [attractions_data['Description'][i]]

attractions  = {}
user_ratings_data = pd.read_csv('datasets/user_ratings.csv', usecols=['Attraction', user])

for i in range(len(user_ratings_data)):
    if user_ratings_data['Attraction'][i] in attraction_names:
        attractions[user_ratings_data['Attraction'][i]] = user_ratings_data[user][i]


attractions_sorted = dict(sorted(attractions.items(), key=lambda item: item[1], reverse=True))

count = 0
for name in attractions_sorted.keys():
    if count == number_of_attractions:
        break
    count+=1
    print(f"Attraction name: {name}")
    print(f"Description: {attractions_description[name][0]}")
    print("***************************************************")

Attraction name: Basilica of Bom Jesus
Description: A historic church and UNESCO World Heritage site in Old Goa.
***************************************************
Attraction name: Vagator Beach
Description: A popular beach destination known for its scenic beauty.
***************************************************
Attraction name: Salim Ali Bird Sanctuary
Description: A bird sanctuary located on the Chorao Island.
***************************************************
Attraction name: St. Cajetan Church
Description: A historic church located in Old Goa.
***************************************************
Attraction name: Colva Beach
Description: A popular beach destination known for its white sands and nightlife.
***************************************************


In [19]:
import csv

users = ['Noel','Harsh','Vikranth','Muthuraj','Armaan']
features = ['Name', 'Rating','Visitors', 'Historical', 'Natural', 'Amusement', 'Beach', 'B']
write_file = open("datasets/user_features.csv", "w", newline='')
writer = csv.writer(write_file)

writer.writerow(features)
for i in range(num_users):
    l = []
    l.append(users[i])
    for j in range(num_features):
        l.append(W[i][j].numpy())
    l.append(B[0][i])
    writer.writerow(l)
