In [45]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd

In [46]:
def return_attractions():
    attractions_data = pd.read_csv('datasets/indian_attractions.csv', usecols=['Name', 'Rating', 'Visits'])
    attractions = []
    for i in range(len(attractions_data)):
        attributes = [attractions_data['Rating'][i], attractions_data['Visits'][i]]
        if attributes:
            attractions.append(attributes)

    attractions = np.array(attractions)
    return attractions

In [47]:
def return_user_attractions_rating():
    users = ['Noel','Harsh', 'Vikranth','Muthuraj','Armaan']
    user_attractions_data = pd.read_csv('datasets/user_attractions_rating.csv')
    user_attractions = []
    for i in range(len(user_attractions_data)):
        attributes = []
        for j in users:
            attributes.append(user_attractions_data[j][i])
        
        user_attractions.append(attributes)
    user_attractions_data = np.array(user_attractions)

    return user_attractions_data

In [48]:
X = return_attractions()
Y = return_user_attractions_rating()
num_users = Y.shape[1]
num_features = X.shape[1]
num_attractions = X.shape[0]


W = np.random.rand(num_users, num_features)
B = np.random.rand(1,num_users)
print(W.shape)
print(B.shape)

(5, 2)
(1, 5)


In [49]:
def cofi_cost_func(X, W, b, Y, lambda_):
    nm, n = X.shape
    nm, nu = Y.shape
    J = 0
    for i in range(nm):
        for j in range(nu):
            sum_ = 0
            if Y[i][j] == 0:
                continue
            for k in range(n):
                sum_ += (W[j][k] * X[i][k])
            cost = (sum_ + b[0][j] - Y[i][j]) ** 2
            J+= (cost/2)
        
    for j in range(nu):
        for k in range(n):
            cost = (W[j][k]) ** 2
            J += (cost * (lambda_ / 2))
            
    for j in range(nm):
        for k in range(n):
            cost = (X[j][k]) ** 2
            J += (cost * (lambda_ / 2))
    
        

    return J

In [53]:
print(f"Cost without regularization: {cofi_cost_func(X, W, B, Y, 0)}")
print(f"Cost with regularization: {cofi_cost_func(X, W, B, Y, 0.1)}")

Cost without regularization: 3919.5608590967786
Cost with regularization: 5381.287905726363


In [54]:
tf.random.set_seed(1234)
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_attractions, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1, num_users),   dtype=tf.float64),  name='B')

optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [61]:
iterations = 100
lambda_ = 1.5
for iter in range(iterations):

    with tf.GradientTape() as tape:
        cost_value = cofi_cost_func(X, W, b, Y, lambda_)

    grads = tape.gradient( cost_value, [X,W,b] )

    optimizer.apply_gradients(zip(grads, [X,W,b]) )

    print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 1691.4
Training loss at iteration 1: 1626.3
Training loss at iteration 2: 1560.6
Training loss at iteration 3: 1494.3
Training loss at iteration 4: 1427.7
Training loss at iteration 5: 1361.5
Training loss at iteration 6: 1296.2
Training loss at iteration 7: 1232.5
Training loss at iteration 8: 1170.6
Training loss at iteration 9: 1110.8
Training loss at iteration 10: 1053.3
Training loss at iteration 11: 998.1
Training loss at iteration 12: 945.4
Training loss at iteration 13: 895.3
Training loss at iteration 14: 847.6
Training loss at iteration 15: 802.4
Training loss at iteration 16: 759.6
Training loss at iteration 17: 719.1
Training loss at iteration 18: 680.7
Training loss at iteration 19: 644.5
Training loss at iteration 20: 610.2
Training loss at iteration 21: 578.0
Training loss at iteration 22: 547.8
Training loss at iteration 23: 519.8
Training loss at iteration 24: 494.0
Training loss at iteration 25: 470.3
Training loss at iteration 26: 448.9


KeyboardInterrupt: 

In [62]:
predicted_ratings = np.dot(X, np.transpose(W)) + b

for user in range(num_users):
    print(f"User {user+1}:")
    for attraction in range(num_attractions):
        if Y[attraction][user] == 0:
            continue
        print(f"Attraction {attraction+1}: Predicted Rating = {predicted_ratings[attraction][user]:.2f}, Actual Rating = {Y[attraction][user]}")
    print("***************************************************")

User 1:
Attraction 4: Predicted Rating = 4.24, Actual Rating = 4.3
Attraction 5: Predicted Rating = 4.17, Actual Rating = 4.5
Attraction 6: Predicted Rating = 4.28, Actual Rating = 4.5
Attraction 8: Predicted Rating = 4.20, Actual Rating = 4.5
Attraction 16: Predicted Rating = 3.86, Actual Rating = 4.2
Attraction 17: Predicted Rating = 4.29, Actual Rating = 4.4
Attraction 25: Predicted Rating = 4.18, Actual Rating = 4.4
Attraction 36: Predicted Rating = 3.98, Actual Rating = 4.3
Attraction 44: Predicted Rating = 4.59, Actual Rating = 4.5
Attraction 49: Predicted Rating = 4.20, Actual Rating = 4.4
Attraction 51: Predicted Rating = 4.32, Actual Rating = 4.5
Attraction 52: Predicted Rating = 4.18, Actual Rating = 4.4
Attraction 56: Predicted Rating = 4.13, Actual Rating = 4.5
Attraction 60: Predicted Rating = 4.32, Actual Rating = 4.4
Attraction 63: Predicted Rating = 4.43, Actual Rating = 4.5
Attraction 64: Predicted Rating = 4.54, Actual Rating = 4.5
Attraction 67: Predicted Rating = 4.

In [47]:
#Testing ratings
user_ratings = np.zeros(num_attractions)
user_ratings[0] = 5
user_ratings[1] = 4
user_ratings[4] = 3
user_ratings[8] = 2
user_ratings[9] = 4

user_rated = [i for i in range(num_attractions) if user_ratings[i] > 0]

Y = np.c_[user_ratings, Y]
print(Y.shape)

(10, 6)


In [48]:
num_attractions, num_users = Y.shape
num_features = 3

tf.random.set_seed(1234) # for consistent results
W = tf.Variable(tf.random.normal((num_users,  num_features),dtype=tf.float64),  name='W')
X = tf.Variable(tf.random.normal((num_attractions, num_features),dtype=tf.float64),  name='X')
b = tf.Variable(tf.random.normal((1,          num_users),   dtype=tf.float64),  name='B')

# Instantiate an optimizer.
optimizer = keras.optimizers.Adam(learning_rate=1e-1)

In [49]:
iterations = 200
lambda_ = 1
for iter in range(iterations):

    with tf.GradientTape() as tape:
        cost_value = cofi_cost_func(X, W, b, Y, lambda_)

    grads = tape.gradient( cost_value, [X,W,b] )

    optimizer.apply_gradients(zip(grads, [X,W,b]) )

    if iter % 20 == 0:
        print(f"Training loss at iteration {iter}: {cost_value:0.1f}")

Training loss at iteration 0: 462.1
Training loss at iteration 20: 70.5
Training loss at iteration 40: 42.4
Training loss at iteration 60: 34.0
Training loss at iteration 80: 30.8
Training loss at iteration 100: 28.2
Training loss at iteration 120: 25.9
Training loss at iteration 140: 24.2
Training loss at iteration 160: 23.3
Training loss at iteration 180: 22.9


In [50]:
p = np.dot(X,np.transpose(W)) + b

pm = p.numpy()
my_predictions = pm[:,0]

# sort predictions
ix = tf.argsort(my_predictions, direction='DESCENDING')

print('\n\nOriginal vs Predicted ratings:\n')
for i in range(len(user_ratings)):
    if user_ratings[i] > 0:
        print(f'Original {user_ratings[i]}, Predicted {my_predictions[i]:0.2f}')



Original vs Predicted ratings:

Original 5.0, Predicted 4.59
Original 4.0, Predicted 3.47
Original 3.0, Predicted 2.77
Original 2.0, Predicted 2.08
Original 4.0, Predicted 3.73
