In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
from sklearn.preprocessing import StandardScaler

In [2]:
def return_attractions():
    attractions_data = pd.read_csv('../../datasets/final_attractions.csv', usecols=['Name', 'Rating', 'Visitors', 'Historical', 'Natural', 'Amusement', 'Beach'])
    attractions = []
    for i in range(len(attractions_data)):
        attributes = [attractions_data['Rating'][i], attractions_data['Visitors'][i], attractions_data['Historical'][i], attractions_data['Natural'][i], attractions_data['Amusement'][i], attractions_data['Beach'][i]]
        if attributes:
            attractions.append(attributes)

    attractions = np.array(attractions)
    return attractions

return_attractions()

array([[4.8e+00, 3.0e+07, 5.0e+00, 4.0e+00, 2.0e+00, 0.0e+00],
       [4.5e+00, 2.0e+06, 2.0e+00, 5.0e+00, 3.0e+00, 0.0e+00],
       [4.4e+00, 1.0e+06, 3.0e+00, 5.0e+00, 3.0e+00, 0.0e+00],
       ...,
       [4.8e+00, 5.8e+07, 2.0e+00, 2.0e+00, 5.0e+00, 0.0e+00],
       [4.6e+00, 1.0e+07, 2.0e+00, 3.0e+00, 4.0e+00, 5.0e+00],
       [4.9e+00, 4.0e+06, 2.0e+00, 5.0e+00, 3.0e+00, 0.0e+00]])

In [3]:
def return_user_attractions_rating():
    user_attractions_data = pd.read_csv('../../datasets/first-user-study-ratings.csv')

    user_attractions_data.drop(['Name', 'State', 'Country'], axis=1, inplace=True)
    user_attractions_data = np.array(user_attractions_data)
    return user_attractions_data

print(return_user_attractions_rating())

[[4 0 5 ... 0 0 0]
 [3 0 2 ... 0 0 0]
 [2 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [4]:
scaler = StandardScaler()
X = return_attractions()
Y = return_user_attractions_rating()
scaler.fit(X)
X = scaler.transform(X)

num_users = Y.shape[1]
num_features = X.shape[1]
num_attractions = X.shape[0]
W = np.random.rand(num_users, num_features)
B = np.random.rand(1,num_users)


print(W.shape)
print(B.shape)
print("X shape: ", X.shape)
print("Y shape: ", Y.shape)

(43, 6)
(1, 43)
X shape:  (1251, 6)
Y shape:  (1251, 43)


In [5]:
def cofi_cost_func(X, W, b, Y, lambda_):
    nm, n = X.shape
    nm, nu = Y.shape
    J = 0
    for i in range(nm):
        for j in range(nu):
            sum_ = 0
            if Y[i][j] == 0:
                continue
            for k in range(n):
                sum_ += (W[j][k] * X[i][k])
            cost = (sum_ + b[0][j] - Y[i][j]) ** 2
            J+= (cost/2)
        
    for j in range(nu):
        for k in range(n):
            cost = (W[j][k]) ** 2
            J += (cost * (lambda_ / 2))
            
    for j in range(nm):
        for k in range(n):
            cost = (X[j][k]) ** 2
            J += (cost * (lambda_ / 2))
    
        

    return J

In [6]:
print(f"Cost without regularization: {cofi_cost_func(X, W, B, Y, 0)}")
print(f"Cost with regularization: {cofi_cost_func(X, W, B, Y, 1.5)}")

Cost without regularization: 7114.187187781213
Cost with regularization: 12805.804242831648


In [7]:

tf.random.set_seed(1234)
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_attractions, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1, num_users),   dtype=tf.float64),  name='B')

optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [8]:
iterations = 200
lambda_ = 0.1

for iter in range(iterations):

    with tf.GradientTape() as tape:
        cost_value = cofi_cost_func(X, W, b, Y, lambda_)

    grads = tape.gradient( cost_value, [X,W,b] )

    optimizer.apply_gradients(zip(grads, [X,W,b]) )
    if iter % 10 == 0:
        print(f"Cost at iteration {iter} is {cost_value.numpy()}")

    

Cost at iteration 0 is 12835.501133613529
Cost at iteration 10 is 1841.7133215457277
Cost at iteration 20 is 640.9195695468325
Cost at iteration 30 is 384.29491987356863
Cost at iteration 40 is 282.2152734475549
Cost at iteration 50 is 222.19273583381425
Cost at iteration 60 is 188.18711552251352
Cost at iteration 70 is 163.36212222581543
Cost at iteration 80 is 143.74285135065278
Cost at iteration 90 is 128.02002771415354
Cost at iteration 100 is 115.06495955911411
Cost at iteration 110 is 104.25197723327418
Cost at iteration 120 is 95.14872871505534
Cost at iteration 130 is 87.44416181736429
Cost at iteration 140 is 80.89915210305222
Cost at iteration 150 is 75.32284819630206
Cost at iteration 160 is 70.56129081757949
Cost at iteration 170 is 66.48688150136145
Cost at iteration 180 is 62.98892686059855
Cost at iteration 190 is 59.96998864147342


In [9]:
predicted_ratings = np.dot(X, np.transpose(W)) + b

for user in range(num_users):
    print(f"User {user+1}:")
    for attraction in range(num_attractions):
        if Y[attraction][user] == 0:
            print(f"Attraction {attraction+1}: Predicted Rating = {predicted_ratings[attraction][user]:.2f}, Actual Rating = Not Rated")
            continue
        print(f"Attraction {attraction+1}: Predicted Rating = {predicted_ratings[attraction][user]:.2f}, Actual Rating = {Y[attraction][user]}")
    print("***************************************************")

User 1:
Attraction 1: Predicted Rating = 4.00, Actual Rating = 4
Attraction 2: Predicted Rating = 3.00, Actual Rating = 3
Attraction 3: Predicted Rating = 2.01, Actual Rating = 2
Attraction 4: Predicted Rating = 4.95, Actual Rating = 5
Attraction 5: Predicted Rating = 2.98, Actual Rating = 3
Attraction 6: Predicted Rating = 2.41, Actual Rating = Not Rated
Attraction 7: Predicted Rating = 1.55, Actual Rating = Not Rated
Attraction 8: Predicted Rating = 1.58, Actual Rating = Not Rated
Attraction 9: Predicted Rating = 3.02, Actual Rating = Not Rated
Attraction 10: Predicted Rating = 3.96, Actual Rating = 4
Attraction 11: Predicted Rating = 2.22, Actual Rating = Not Rated
Attraction 12: Predicted Rating = 3.95, Actual Rating = 4
Attraction 13: Predicted Rating = 2.99, Actual Rating = 3
Attraction 14: Predicted Rating = 2.01, Actual Rating = 2
Attraction 15: Predicted Rating = 2.98, Actual Rating = 3
Attraction 16: Predicted Rating = 2.22, Actual Rating = Not Rated
Attraction 17: Predicted 

In [10]:
predicted_ratings = np.array(predicted_ratings)
print(predicted_ratings.shape)

(1251, 43)


In [12]:
import csv
write_file = open("../../datasets/predictions/predicted_user_ratings.csv", "w", newline='')
writer = csv.writer(write_file)
attractions_data = pd.read_csv('../../datasets/final_attractions.csv', usecols=['Name'])
users = [i for i in pd.read_csv('../../datasets/first-user-study-ratings.csv').columns if i != 'Name' and i != 'State' and i != 'Country']
print(users)
row = ['Name'] + users

writer.writerow(row)
for i in range(num_attractions):
    attraction_row = []
    attraction_row.append(attractions_data['Name'][i])
    for j in range(num_users):
        if Y[i][j] == 0:
            attraction_row.append(predicted_ratings[i][j])
            continue
        attraction_row.append(Y[i][j])
    writer.writerow(attraction_row)



['Vikranth U', 'Shiv Chopra', 'Noel', 'Tejaswi Kottakki', 'Praveer Singh Chauhan ', 'Nischay', 'Muthuraj Vairamuthu', 'Pritesh Mistry', 'Alpin', 'Nilesh v Patel', 'Hetvi Suthar', 'Nirav Bavisi', 'Jugal Modi', 'Nipun Misra', 'Sagar A', 'Alpeshkumar', 'Paras Verma', 'Jignesh Desai', 'Samir   ', 'Solkar', 'Bharat Gedia', 'Ritesh Shah', 'Megha Desai', 'Vaishnavi Damodar', 'Priti Shah', 'SONI BUDHIA', 'Armaan', 'Test1', 'Test2', 'Test3', 'Test4', 'Vatsal', 'Dhruv Kumar', 'Mauly', 'Abhinav ', 'Roshan kumar mahto', 'Satvik', 'Tushar Chandra', 'Kartikeya Malik ', 'Suraj Jha ', 'Piyush singh ', 'Rishit', 'Daksh Pandey']


In [20]:
state = 'Punjab'
user = 'Vikranth U'
number_of_attractions = 5
attractions_data = pd.read_csv('../../datasets/final_attractions.csv', usecols=['Name','State','Description'])
attraction_names = []
attractions_description = {}
for i in range(len(attractions_data)):
    if (attractions_data['State'][i] == state):
        attraction_names.append(attractions_data['Name'][i])
        attractions_description[attractions_data['Name'][i]] = [attractions_data['Description'][i]]

attractions  = {}
user_ratings_data = pd.read_csv('../../datasets/predictions/predicted_user_ratings.csv', usecols=['Name', user])

for i in range(len(user_ratings_data)):
    if user_ratings_data['Name'][i] in attraction_names:
        attractions[user_ratings_data['Name'][i]] = user_ratings_data[user][i]


attractions_sorted = dict(sorted(attractions.items(), key=lambda item: item[1], reverse=True))
print(attractions_sorted)
count = 0
for name in attractions_sorted.keys():
    if count == number_of_attractions:
        break
    count+=1
    print(f"Attraction name: {name}")
    print(f"Description: {attractions_description[name][0]}")
    print("***************************************************")

{'Devi Talab Mandir': 2.0737274723198693, 'Gurdwara Manji Sahib': 2.0736865490741763, 'Maharaja Ranjit Singh Museum': 2.07368111194984, 'Golden Temple': 2.0736799480603505, 'Rock Garden of Chandigarh': 2.0736770150384216, 'Sukhna Lake': 2.073676988687057, 'Bathinda Fort': 2.0736738304365, 'Sirhind Fort': 2.073666740308437, 'Jallianwala Bagh': 2.0736641360886754, 'Partition Museum': 2.073661931896508, 'Shiv Mandir, Gurudwara Nanak Mata': 2.0736588110950898, 'Maharaja Ranjit Singh War Museum': 2.0736532193805908, 'Ram Tirath': 2.073652530832527, 'Fateh Burj': 2.073644840819676, 'Virasat-e-Khalsa': 2.073642628412238, 'Wagah Border': 2.073637800692226, 'Pushpa Gujral Science City': 2.0736335744763728, 'Anandpur Sahib': 2.0736331961691463, 'Durgiana Temple': 2.0736292670562135, 'Harike Wetland and Bird Sanctuary': 2.073625065914862, 'Mukteshwar Temple': 2.0736227321303975, 'Quila Mubarak': 2.0736150120943835, 'Rose Garden': 2.0736066888253535, 'Ranjit Sagar Dam': 2.07360347457429, 'Kali Tem

In [22]:
import csv
user_ratings_data = pd.read_csv('../../datasets/predictions/predicted_user_ratings.csv')
user_ratings_data.drop('Name', axis=1, inplace=True)

users = []
for i in user_ratings_data.columns:
    users.append(i)
num_users = len(users)
features = ['Name', 'Rating','Visitors', 'Historical', 'Natural', 'Amusement', 'Beach', 'B']
write_file = open("../../datasets/user_features.csv", "w", newline='')
writer = csv.writer(write_file)

writer.writerow(features)
for i in range(num_users):
    l = []
    l.append(users[i])
    for j in range(num_features):
        l.append(W[i][j].numpy())
    l.append(B[0][i])
    writer.writerow(l)
