# Part A: User – User Recommender System

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

user_business = pd.read_csv("user-business.csv", header=None)
business = pd.read_csv("business.csv", header=None)


In [2]:
# hidden businesses that Alex has visited
alex_data = user_business.iloc[3]

# First 100 business reviews only
R_first_100 = user_business.iloc[:, :100]

# Alex's ratings from the 101st restuarant to 10,000
alex_ratings_excluding_100 = alex_data.iloc[100:]

# First 100 ratings from other users without Alex's row
others_ratings_excluding_100 = user_business.drop(3, axis=0).iloc[:, 100:]

In [3]:
# Calculate the cosine similarity between Alex & other users exlcuding the first 100 business

# all rows after the 100th column
masked_data = user_business.iloc[:, 100:]


cos_similarities = cosine_similarity(masked_data)

In [25]:
alex_cs = cos_similarities[3]

In [None]:
cos

In [27]:
# Compute rAlex,b for all businesses b
rAlex = np.dot(np.delete(alex_cs, 3, 0), np.delete(user_business.to_numpy(), 3, 0)) 

In [35]:
print(rAlex[:100])
top5_indices = np.argsort(rAlex[:100])[-5:]
print(business.iloc[top5_indices].values)

[5.55475823e-01 2.71788692e+00 2.75364038e-01 1.26262441e+01
 8.10837615e-01 8.42329717e-02 4.47614940e-01 4.30395269e+01
 1.65985436e+00 0.00000000e+00 1.90067114e-01 2.05989961e-01
 3.34967584e-01 6.94416684e-01 5.27103363e-01 1.18765337e-01
 3.32632250e+01 0.00000000e+00 3.94506648e-01 2.72266385e-02
 0.00000000e+00 1.29292993e-01 1.43980436e-01 0.00000000e+00
 1.22627662e-01 1.26502116e-01 4.84501583e-02 8.39181358e-02
 6.42265821e-01 7.70822706e-01 2.20887758e-01 2.16675695e-02
 3.17180740e-02 0.00000000e+00 0.00000000e+00 3.44476624e-01
 5.77583523e+00 1.41957715e+00 4.02080451e-01 3.27829401e+01
 1.63886141e+00 7.29230655e-01 5.06083534e-01 4.51524236e-01
 5.74080438e+00 1.81620968e-01 2.20796731e+00 5.66511379e-01
 0.00000000e+00 4.07285859e-01 2.69963752e-01 3.06425707e-02
 8.94373253e-01 4.12172931e-01 6.37039615e-01 2.75420689e-01
 2.17514939e-01 8.28039388e+00 9.21726968e-02 2.72989115e-01
 4.63400314e-01 3.35981878e+01 1.21646443e+00 0.00000000e+00
 3.07905879e-01 8.395723

The top 5 restaurants that have the highest similarity scores to Alex are:

1. Piece of Cake
2. Farm Burger
3. Loca Luna
4. Seven Lamps
5. Papi's Cuban & Caribbean Grill

# Part B: Item - Item Recommender System

In [45]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

user_business = pd.read_csv("user-business.csv", header=None)
business = pd.read_csv("business.csv", header=None)


In [46]:
# Remove Alex's row from the business ratings 
missing_alex = user_business.drop(3, axis = 0)


item_cosine_similarity = cosine_similarity(missing_alex.T)

In [47]:
alex_ratings = user_business.iloc[3]
rAlex = np.dot(item_cosine_similarity, alex_ratings)

In [48]:
print(rAlex[:100])
top5_indices = np.argsort(rAlex[:100])[-5:]
print(business.iloc[top5_indices].values)

[7.27765431e-02 4.46080257e-01 9.20297611e-02 3.73017786e+00
 1.07328700e-01 2.52136037e-02 2.13624343e-01 6.81093661e+00
 1.57519835e-01 0.00000000e+00 6.72209047e-02 3.24961824e-02
 2.16661315e-01 1.24426175e-01 5.88849632e-01 2.99669452e-02
 5.85268103e+00 0.00000000e+00 7.28603947e-02 3.96074550e-03
 0.00000000e+00 5.48597525e-02 1.83404335e-02 0.00000000e+00
 5.78011568e-02 2.47991835e-02 8.97303170e-03 2.19899749e-02
 1.32742652e-01 1.73937414e-01 4.86562881e-02 5.20543940e-03
 1.82940433e-02 0.00000000e+00 0.00000000e+00 5.15667989e-02
 2.51699057e+00 2.17289537e-01 4.51102241e-02 6.55881491e+00
 2.31971571e-01 1.14928194e-01 4.11640968e-02 7.63770990e-02
 2.17330003e+00 2.25916169e-02 3.16885118e-01 1.20220925e-01
 0.00000000e+00 4.56668369e-02 7.49534336e-02 1.90381239e-02
 2.67939919e-01 6.58877380e-02 1.59655874e-01 1.10761321e-01
 6.75902484e-02 2.85095833e+00 9.99247857e-02 9.24055469e-02
 8.95758519e-02 6.44036675e+00 2.16568719e-01 0.00000000e+00
 4.99958674e-02 4.085676

By using an item-item recommender, the top 5 restaurants that have the highest similarity scores to Alex are:

1. Piece of Cake
2. Farm Burger
3. Loca Luna
4. Seven Lamps
5. Papi's Cuban & Caribbean Grill

# Part C: Latent Hidden Model Recommendation System

In [55]:
user_business = pd.read_csv("user-business.csv", header=None)
business = pd.read_csv("business.csv", header=None)


In [56]:
import numpy as np
import pandas as pd
from scipy.sparse.linalg import svds


In [64]:

# Perform SVD with k = 10
k = 10
U, sigma, Vt = svds(user_business.to_numpy(dtype=np.float64), k = k)

# Reconstruct the R* matrix using the top 10 factors 
sigma = np.diag(sigma)
R_star = np.dot(np.dot(U, sigma), Vt)

# Get Alex's estimated ratings from R_star
alex_estimated_ratings = R_star[3]

# Get the indices of the top 5 businesses in S for Alex
top5_indices1 = np.argpartition(alex_estimated_ratings[:100], -5)[-5:]
top5_indices1 = top5_indices1[np.argsort(-alex_estimated_ratings[top5_indices])]


# Get the top 5 business in S for Alex
top5_businesses = first100_businesses.iloc[top5_indices1]
top5_estimated_ratings = alex_estimated_ratings[top5_indices1]

print("Top 5 businesses for Alex:")
print(top5_businesses)
print("Their estimated ratings:")
print(top5_estimated_ratings)

Top 5 businesses for Alex:
    0   1   2   3   4   5   6   7   8   9   ...  90  91  92  93  94  95  96  \
61   0   0   0   0   0   0   0   0   0   0  ...   1   1   0   0   0   0   0   
39   0   0   0   0   0   0   0   0   0   0  ...   0   0   0   0   0   0   0   
7    0   0   0   0   0   0   0   0   1   0  ...   0   0   0   0   0   0   0   
16   0   0   0   0   0   0   0   0   0   0  ...   0   0   0   0   0   0   0   
3    0   0   0   0   0   0   0   0   0   0  ...   0   0   0   0   0   0   0   

    97  98  99  
61   0   0   0  
39   0   0   0  
7    0   0   0  
16   0   0   0  
3    0   0   0  

[5 rows x 100 columns]
Their estimated ratings:
[0.81794731 0.85782639 1.19050642 0.87625457 0.29935439]


Based on the Latent Hidden Model Recommendation System the top 5 businesses and thier correspondong similaritiy scores are: 

1. Papi's Cuban & Caribbean Grill with score of 1.1905064199911026
2. Loca Luna with score of 0.876254570830219
3. Farm Burger with score of 0.8578263876279594
4. Seven Lamps with score of 0.8179473119616179
5. Piece of Cake with score of 0.299354393760916

In [65]:
print(rAlex[:100])
top5_indices = np.argsort(rAlex[:100])[-5:]
print(business.iloc[top5_indices].values)

[7.27765431e-02 4.46080257e-01 9.20297611e-02 3.73017786e+00
 1.07328700e-01 2.52136037e-02 2.13624343e-01 6.81093661e+00
 1.57519835e-01 0.00000000e+00 6.72209047e-02 3.24961824e-02
 2.16661315e-01 1.24426175e-01 5.88849632e-01 2.99669452e-02
 5.85268103e+00 0.00000000e+00 7.28603947e-02 3.96074550e-03
 0.00000000e+00 5.48597525e-02 1.83404335e-02 0.00000000e+00
 5.78011568e-02 2.47991835e-02 8.97303170e-03 2.19899749e-02
 1.32742652e-01 1.73937414e-01 4.86562881e-02 5.20543940e-03
 1.82940433e-02 0.00000000e+00 0.00000000e+00 5.15667989e-02
 2.51699057e+00 2.17289537e-01 4.51102241e-02 6.55881491e+00
 2.31971571e-01 1.14928194e-01 4.11640968e-02 7.63770990e-02
 2.17330003e+00 2.25916169e-02 3.16885118e-01 1.20220925e-01
 0.00000000e+00 4.56668369e-02 7.49534336e-02 1.90381239e-02
 2.67939919e-01 6.58877380e-02 1.59655874e-01 1.10761321e-01
 6.75902484e-02 2.85095833e+00 9.99247857e-02 9.24055469e-02
 8.95758519e-02 6.44036675e+00 2.16568719e-01 0.00000000e+00
 4.99958674e-02 4.085676