<a href="https://colab.research.google.com/github/C23-PS396/LeftLovers-MachineLearning/blob/main/Content_Based_Filtering_Restaurant_Recommender_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# kaggle API
!pip install -q kaggle
from google.colab import files
files.upload()
!mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

In [None]:
# download the data set and unzip
!kaggle datasets download -d ahmedshahriarsakib/uber-eats-usa-restaurants-menus
!unzip /content/uber-eats-usa-restaurants-menus.zip

In [None]:
# libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split

In [None]:
# load the dataset and preview
df = pd.read_csv('/content/restaurants.csv')
df.head()

Unnamed: 0,id,position,name,score,ratings,category,price_range,full_address,zip_code,lat,lng
0,1,19,PJ Fresh (224 Daniel Payne Drive),,,"Burgers, American, Sandwiches",$,"224 Daniel Payne Drive, Birmingham, AL, 35207",35207,33.562365,-86.830703
1,2,9,J' ti`'z Smoothie-N-Coffee Bar,,,"Coffee and Tea, Breakfast and Brunch, Bubble Tea",,"1521 Pinson Valley Parkway, Birmingham, AL, 35217",35217,33.58364,-86.77333
2,3,6,Philly Fresh Cheesesteaks (541-B Graymont Ave),,,"American, Cheesesteak, Sandwiches, Alcohol",$,"541-B Graymont Ave, Birmingham, AL, 35204",35204,33.5098,-86.85464
3,4,17,Papa Murphy's (1580 Montgomery Highway),,,Pizza,$,"1580 Montgomery Highway, Hoover, AL, 35226",35226,33.404439,-86.806614
4,5,162,Nelson Brothers Cafe (17th St N),4.7,22.0,"Breakfast and Brunch, Burgers, Sandwiches",,"314 17th St N, Birmingham, AL, 35203",35203,33.51473,-86.8117


## EDA and Preprocessing

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40227 entries, 0 to 40226
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            40227 non-null  int64  
 1   position      40227 non-null  int64  
 2   name          40227 non-null  object 
 3   score         22254 non-null  float64
 4   ratings       22254 non-null  float64
 5   category      40204 non-null  object 
 6   price_range   33581 non-null  object 
 7   full_address  39949 non-null  object 
 8   zip_code      39940 non-null  object 
 9   lat           40227 non-null  float64
 10  lng           40227 non-null  float64
dtypes: float64(4), int64(2), object(5)
memory usage: 3.4+ MB


In [None]:
# drop unecessary columns
df.drop(columns=['position', 'full_address', 'zip_code'], axis=1, inplace=True)

In [None]:
# counting missing value percentage of each column
missing_df = (df.isnull().sum() / df.shape[0] * 100).reset_index().rename(columns={0: 'missing %'})
missing_df[missing_df['missing %'] > 0]

Unnamed: 0,index,missing %
2,score,44.678947
3,ratings,44.678947
4,category,0.057176
5,price_range,16.521242


In [None]:
# change price range column type
df['price_range'].unique()

array(['$', nan, '$$', '$$$', '$$$$'], dtype=object)

In [None]:
# dropping price range col
df.drop(columns=['price_range'], axis=1, inplace=True)

In [None]:
# handling missing numerical values
for cat in ['score', 'ratings']:
  df[cat].fillna(df[cat].mean(), inplace=True)

In [None]:
# handling missing categorical value

# inspect
df[df['category'].isnull()][['name']]

Unnamed: 0,name
978,Dunkin' (8171 Us Highway 431)
7106,Sam's Hot Dog Stand
11812,McMenamins Zeus Cafe
14400,Millwood Grocery
14527,Bulldog Liquor &amp; Wine
15051,A1 Hop Shop 144
15732,Liquor &amp; Liquor
21786,Yaki Sushi
24474,Sam's Kitchen
24483,Fabios NY Pizza


In [None]:
# fill the null value with mode since replacing them one by one would be
# impractical
mode = df['category'].value_counts().index[0]
df['category'].fillna(mode, inplace=True)

In [None]:
# recheck missing values
df.isnull().sum()

id          0
name        0
score       0
ratings     0
category    0
lat         0
lng         0
dtype: int64

### Picking the top 20 categories

In [None]:
# get category unique values
cats_dup = df['category'].tolist()
cats_list = []
for line in cats_dup:
  for cat in line.split(', '):
    cats_list.append(cat)

# picking top 20 categories
cats_20 = pd.value_counts(cats_list)[:20].index
print("top 20 categories: \n" + ", ".join(cats_20))

top 20 categories: 
American, Burgers, Fast Food, Sandwich, Sandwiches, Healthy, Asian, Comfort Food, Family Meals, Mexican, Breakfast and Brunch, Desserts, Pizza, Salads, Chicken, Convenience, Italian, Everyday Essentials, Wings, Family Friendly


In [None]:
# filtering out restaurants
ids = []
for cat in cats_20:
  ids += list(df[df['category'].str.contains(cat)].index)

print(f"The num of list restaurants {len(ids)}") 
unique_ids = set(ids)
print(f"The num of unique restaurants {len(unique_ids)}") 

The num of list restaurants 97651
The num of unique restaurants 37540


In [None]:
# new dataframe
df = df.iloc[list(unique_ids)]
df.head()

Unnamed: 0,id,name,score,ratings,category,lat,lng
0,1,PJ Fresh (224 Daniel Payne Drive),4.560996,74.870989,"Burgers, American, Sandwiches",33.562365,-86.830703
1,2,J' ti`'z Smoothie-N-Coffee Bar,4.560996,74.870989,"Coffee and Tea, Breakfast and Brunch, Bubble Tea",33.58364,-86.77333
2,3,Philly Fresh Cheesesteaks (541-B Graymont Ave),4.560996,74.870989,"American, Cheesesteak, Sandwiches, Alcohol",33.5098,-86.85464
3,4,Papa Murphy's (1580 Montgomery Highway),4.560996,74.870989,Pizza,33.404439,-86.806614
4,5,Nelson Brothers Cafe (17th St N),4.7,22.0,"Breakfast and Brunch, Burgers, Sandwiches",33.51473,-86.8117


### One-hot encode the features

In [None]:
# one hot encode
for cat in cats_20:
  df[cat] = df['category'].apply(lambda x: 1 if cat in x else 0)

df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[cat] = df['category'].apply(lambda x: 1 if cat in x else 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[cat] = df['category'].apply(lambda x: 1 if cat in x else 0)


Unnamed: 0,id,name,score,ratings,category,lat,lng,American,Burgers,Fast Food,...,Breakfast and Brunch,Desserts,Pizza,Salads,Chicken,Convenience,Italian,Everyday Essentials,Wings,Family Friendly
0,1,PJ Fresh (224 Daniel Payne Drive),4.560996,74.870989,"Burgers, American, Sandwiches",33.562365,-86.830703,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,J' ti`'z Smoothie-N-Coffee Bar,4.560996,74.870989,"Coffee and Tea, Breakfast and Brunch, Bubble Tea",33.58364,-86.77333,0,0,0,...,1,0,0,0,0,0,0,0,0,0
2,3,Philly Fresh Cheesesteaks (541-B Graymont Ave),4.560996,74.870989,"American, Cheesesteak, Sandwiches, Alcohol",33.5098,-86.85464,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,4,Papa Murphy's (1580 Montgomery Highway),4.560996,74.870989,Pizza,33.404439,-86.806614,0,0,0,...,0,0,1,0,0,0,0,0,0,0
4,5,Nelson Brothers Cafe (17th St N),4.7,22.0,"Breakfast and Brunch, Burgers, Sandwiches",33.51473,-86.8117,0,1,0,...,1,0,0,0,0,0,0,0,0,0


## Merge with menu dataset

In [None]:
menu_df = pd.read_csv('/content/restaurant-menus.csv')
menu_df.head()

Unnamed: 0,restaurant_id,category,name,description,price
0,1,Extra Large Pizza,Extra Large Meat Lovers,Whole pie.,15.99 USD
1,1,Extra Large Pizza,Extra Large Supreme,Whole pie.,15.99 USD
2,1,Extra Large Pizza,Extra Large Pepperoni,Whole pie.,14.99 USD
3,1,Extra Large Pizza,Extra Large BBQ Chicken &amp; Bacon,Whole Pie,15.99 USD
4,1,Extra Large Pizza,Extra Large 5 Cheese,Whole pie.,14.99 USD


In [None]:
menu_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3375211 entries, 0 to 3375210
Data columns (total 5 columns):
 #   Column         Dtype 
---  ------         ----- 
 0   restaurant_id  int64 
 1   category       object
 2   name           object
 3   description    object
 4   price          object
dtypes: int64(1), object(4)
memory usage: 128.8+ MB


In [None]:
# preprocess
menu_df['price'] = menu_df['price'].str.replace(" USD", "")
menu_df['price'] = menu_df['price'].astype(float)
menu_df.dtypes

restaurant_id      int64
category          object
name              object
description       object
price            float64
dtype: object

In [None]:
# get the price range
avg_menu = menu_df.groupby('restaurant_id')[['price']].mean().reset_index().rename(columns={'restaurant_id':'id', 'price':'avg_price'})
avg_menu.head()

Unnamed: 0,id,avg_price
0,1,5.663684
1,2,5.505333
2,3,10.762143
3,4,10.531892
4,5,4.532576


In [None]:
# merge
df = pd.merge(df, avg_menu, how='left', on='id')
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 37540 entries, 0 to 37539
Data columns (total 28 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    37540 non-null  int64  
 1   name                  37540 non-null  object 
 2   score                 37540 non-null  float64
 3   ratings               37540 non-null  float64
 4   category              37540 non-null  object 
 5   lat                   37540 non-null  float64
 6   lng                   37540 non-null  float64
 7   American              37540 non-null  int64  
 8   Burgers               37540 non-null  int64  
 9   Fast Food             37540 non-null  int64  
 10  Sandwich              37540 non-null  int64  
 11  Sandwiches            37540 non-null  int64  
 12  Healthy               37540 non-null  int64  
 13  Asian                 37540 non-null  int64  
 14  Comfort Food          37540 non-null  int64  
 15  Family Meals       

In [None]:
# filling the nan value for the missing values, fill them with mean
df['avg_price'].fillna(df['avg_price'].mean(), inplace=True)

In [None]:
df['avg_price'].isnull().sum()

0

In [None]:
df.head()

Unnamed: 0,id,name,score,ratings,category,lat,lng,American,Burgers,Fast Food,...,Desserts,Pizza,Salads,Chicken,Convenience,Italian,Everyday Essentials,Wings,Family Friendly,avg_price
0,1,PJ Fresh (224 Daniel Payne Drive),4.560996,74.870989,"Burgers, American, Sandwiches",33.562365,-86.830703,1,1,0,...,0,0,0,0,0,0,0,0,0,5.663684
1,2,J' ti`'z Smoothie-N-Coffee Bar,4.560996,74.870989,"Coffee and Tea, Breakfast and Brunch, Bubble Tea",33.58364,-86.77333,0,0,0,...,0,0,0,0,0,0,0,0,0,5.505333
2,3,Philly Fresh Cheesesteaks (541-B Graymont Ave),4.560996,74.870989,"American, Cheesesteak, Sandwiches, Alcohol",33.5098,-86.85464,1,0,0,...,0,0,0,0,0,0,0,0,0,10.762143
3,4,Papa Murphy's (1580 Montgomery Highway),4.560996,74.870989,Pizza,33.404439,-86.806614,0,0,0,...,0,1,0,0,0,0,0,0,0,10.531892
4,5,Nelson Brothers Cafe (17th St N),4.7,22.0,"Breakfast and Brunch, Burgers, Sandwiches",33.51473,-86.8117,0,1,0,...,0,0,0,0,0,0,0,0,0,4.532576


## Creating User transaction Dataframe



The features of the users will be:

Not used for training
1.   id (restaurant_id)

Used for training
1.   Age ranging from 18 to 60
2.   Per Transaction spendings 15 to 100 usd avg
3.   The number of transactions done for each food category ranging from 1 - 100

for now there'll be 300 users who have rated 20 to 300 restaurants



In [None]:
# creating user data frame
users_data = np.array([]).reshape(-1,23)
for i in range(300):
  num_res = np.random.randint(20, 300)
  res_ids = np.random.randint(0, df.shape[0], size=(num_res,1))
  ages = np.tile(np.random.randint(18, 61, size=(1,1)), (num_res,1))
  per_ts = np.tile((100-15) * np.random.random_sample((1,1)) + 15, (num_res,1))
  num_cat = np.tile((5-1) * np.random.random_sample((1,20)) + 1, (num_res,1))
  t = np.concatenate((res_ids, ages, per_ts, num_cat), axis=1)
  users_data = np.concatenate((users_data, t), axis=0)

user_df = pd.DataFrame(data=users_data, columns = ['res_id', 'age', 'avg_per_transaction'] + list(cats_20))
user_df.head()

Unnamed: 0,res_id,age,avg_per_transaction,American,Burgers,Fast Food,Sandwich,Sandwiches,Healthy,Asian,...,Breakfast and Brunch,Desserts,Pizza,Salads,Chicken,Convenience,Italian,Everyday Essentials,Wings,Family Friendly
0,24896.0,52.0,79.431271,1.092546,2.368937,1.319903,3.149351,4.79964,3.114432,4.001346,...,3.946919,4.377756,1.119757,4.824244,3.357502,4.57208,3.488574,4.53365,2.404719,4.275208
1,845.0,52.0,79.431271,1.092546,2.368937,1.319903,3.149351,4.79964,3.114432,4.001346,...,3.946919,4.377756,1.119757,4.824244,3.357502,4.57208,3.488574,4.53365,2.404719,4.275208
2,27374.0,52.0,79.431271,1.092546,2.368937,1.319903,3.149351,4.79964,3.114432,4.001346,...,3.946919,4.377756,1.119757,4.824244,3.357502,4.57208,3.488574,4.53365,2.404719,4.275208
3,21487.0,52.0,79.431271,1.092546,2.368937,1.319903,3.149351,4.79964,3.114432,4.001346,...,3.946919,4.377756,1.119757,4.824244,3.357502,4.57208,3.488574,4.53365,2.404719,4.275208
4,35158.0,52.0,79.431271,1.092546,2.368937,1.319903,3.149351,4.79964,3.114432,4.001346,...,3.946919,4.377756,1.119757,4.824244,3.357502,4.57208,3.488574,4.53365,2.404719,4.275208


In [None]:
print(f"user transactions shape: {user_df.shape}")

user transactions shape: (47120, 23)


## Create item transactions of users dataframe

In [None]:
item_df = pd.merge(df, user_df[['res_id']], left_index=True, right_on='res_id', how='right')
item_df = item_df.drop(columns=['res_id'])
item_df.head()

Unnamed: 0,id,name,score,ratings,category,lat,lng,American,Burgers,Fast Food,...,Desserts,Pizza,Salads,Chicken,Convenience,Italian,Everyday Essentials,Wings,Family Friendly,avg_price
0,26673,Big Daddy Sandwiches,4.560996,74.870989,"Mexican, Latin American, Tacos",39.075291,-77.118092,1,0,0,...,0,0,0,0,0,0,0,0,0,6.991379
1,888,Pizza Hut (200 Highway 31 SW),4.560996,74.870989,"Italian, pizza, wings",34.4418,-86.9417,0,0,0,...,0,0,0,0,0,1,0,0,0,15.301277
2,29319,CNX Thai Cooking,4.8,131.0,"Thai, Asian, Noodles, Allergy Friendly",44.48062,-73.21107,0,0,0,...,0,0,0,0,0,0,0,0,0,8.8
3,23086,Dolce &amp; Ciabatta,4.6,18.0,"Black-owned, Sandwich, Coffee and Tea, Europea...",39.10786,-77.56918,0,0,0,...,0,0,0,0,0,0,0,0,0,10.609077
4,37651,Potbelly Sandwich Shop (Sundance Square | 97),4.9,78.0,"Deli, Salads, Sandwich, Healthy",32.753409,-97.332815,0,0,0,...,0,0,1,0,0,0,0,0,0,5.604493


In [None]:
print(f"item transactions shape: {item_df.shape}")

item transactions shape: (47120, 28)


## Creating y labels

In [None]:
y_label = (5-1) * np.random.random_sample((item_df.shape[0],1)) + 1
y_label[:5]

array([[4.38932891],
       [2.63907133],
       [3.76926708],
       [1.09424693],
       [2.04157719]])

## Training The Model

### Splitting data into training and test

In [None]:
num_user_f = user_df.shape[1]-3
num_item_f = item_df.shape[1]-6

user_t_f = ['res_id','American', 'Burgers',
       'Fast Food', 'Sandwich', 'Sandwiches', 'Healthy', 'Asian',
       'Comfort Food', 'Family Meals', 'Mexican', 'Breakfast and Brunch',
       'Desserts', 'Pizza', 'Salads', 'Chicken', 'Convenience', 'Italian',
       'Everyday Essentials', 'Wings', 'Family Friendly']
item_t_f = ['id','score', 'ratings', 'American',
       'Burgers', 'Fast Food', 'Sandwich', 'Sandwiches', 'Healthy', 'Asian',
       'Comfort Food', 'Family Meals', 'Mexican', 'Breakfast and Brunch',
       'Desserts', 'Pizza', 'Salads', 'Chicken', 'Convenience', 'Italian',
       'Everyday Essentials', 'Wings', 'Family Friendly']

print(f"user features: {num_user_f} | item features: {num_item_f}")

user features: 20 | item features: 22


In [None]:
item_weights = np.array([0, 0.1, 0.1] + [2]*20)
user_weights = np.array([0] + [2]*20)

In [None]:
# scaling the data
scalerItem = StandardScaler()
scalerItem.fit(item_df[item_t_f] * item_weights)
item_train = scalerItem.transform(item_df[item_t_f] * item_weights)

scalerUser = StandardScaler()
scalerUser.fit(user_df[user_t_f] * user_weights)
user_train = scalerUser.transform(user_df[user_t_f] * user_weights)

scalerTarget = MinMaxScaler((-1, 1))
scalerTarget.fit(y_label.reshape(-1, 1))
y_train = scalerTarget.transform(y_label.reshape(-1, 1))

In [None]:
print(f"the shape of\nuser_train: {user_train.shape} | item_train: {item_train.shape} | y_train: {y_train.shape}" )

the shape of
user_train: (47120, 21) | item_train: (47120, 23) | y_train: (47120, 1)


In [None]:
# splitting
item_train, item_test = train_test_split(item_train, train_size=0.80, shuffle=True, random_state=1)
user_train, user_test = train_test_split(user_train, train_size=0.80, shuffle=True, random_state=1)
y_train, y_test       = train_test_split(y_train,    train_size=0.80, shuffle=True, random_state=1)
print(f"item training data shape: {item_train.shape}")
print(f"item test data shape: {item_test.shape}")

item training data shape: (37696, 23)
item test data shape: (9424, 23)


### Model Architecture

In [None]:
# MODEL ARCHITECTURE
num_outputs = 32
tf.random.set_seed(1)
user_NN = tf.keras.models.Sequential([
       
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear')
    
])

item_NN = tf.keras.models.Sequential([
       
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(num_outputs, activation='linear')
    
])

# create the user input and point to the base network
input_user = tf.keras.layers.Input(shape=(num_user_f))
vu = user_NN(input_user)
vu = tf.linalg.l2_normalize(vu, axis=1)

# create the item input and point to the base network
input_item = tf.keras.layers.Input(shape=(num_item_f))
vm = item_NN(input_item)
vm = tf.linalg.l2_normalize(vm, axis=1)

# compute the dot product of the two vectors vu and vm
output = tf.keras.layers.Dot(axes=1)([vu, vm])

# specify the inputs and output of the model
model = tf.keras.Model([input_user, input_item], output)

model.summary()

Model: "model_12"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_25 (InputLayer)          [(None, 20)]         0           []                               
                                                                                                  
 input_26 (InputLayer)          [(None, 22)]         0           []                               
                                                                                                  
 sequential_24 (Sequential)     (None, 32)           13024       ['input_25[0][0]']               
                                                                                                  
 sequential_25 (Sequential)     (None, 32)           13280       ['input_26[0][0]']               
                                                                                           

In [None]:
# COMPILING
tf.random.set_seed(1)
cost_fn = tf.keras.losses.MeanSquaredError()
opt = tf.keras.optimizers.Adam(learning_rate=0.01)
model.compile(optimizer=opt,
              loss=cost_fn)

## TRAINING and TESTING...

In [None]:
# FITTING
tf.random.set_seed(1)
model.fit([user_train[:,1:], item_train[:,1:]], y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7ffb3b773f70>

In [None]:
# EVALUATING
model.evaluate([user_test[:,1:], item_test[:,1:]], y_test)



0.3499331474304199

## PREDICTING

In [None]:
user_df.columns

Index(['res_id', 'age', 'avg_per_transaction', 'American', 'Burgers',
       'Fast Food', 'Sandwich', 'Sandwiches', 'Healthy', 'Asian',
       'Comfort Food', 'Family Meals', 'Mexican', 'Breakfast and Brunch',
       'Desserts', 'Pizza', 'Salads', 'Chicken', 'Convenience', 'Italian',
       'Everyday Essentials', 'Wings', 'Family Friendly'],
      dtype='object')

In [None]:
id = [301]
age = []
tran = [] # usd
f_tran = [0]*20
f_tran[1] = 5.0
# f_tran[2] = 5.0

user_vec = np.array(id + age + tran + f_tran)
user_vec = np.tile(user_vec, (df.shape[0],1))
item_vec = df[item_t_f]
# scale our user and item vectors
suser_vecs = scalerUser.transform(user_vec)
sitem_vecs = scalerItem.transform(item_vec)

# make a prediction
y_p = model.predict([suser_vecs[:,1:], sitem_vecs[:,1:]])

# unscale y prediction 
y_pu = scalerTarget.inverse_transform(y_p)

# sort the results, highest prediction first
sorted_index = np.argsort(-y_pu,axis=0).reshape(-1).tolist()  #negate to get largest rating first
sorted_ypu   = y_pu[sorted_index]

sorted_index = [str(x) for x in sorted_index[:5]]
sorted_ypu = [str(x) for x in sorted_ypu[:5]]

print(f"user vector: {user_vec[0]}")
print(f"y value: {sorted_ypu}")
print(f"index: {sorted_index}")

df.iloc[sorted_index]

  60/1174 [>.............................] - ETA: 0s  



user vector: [301.   0.   5.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.   0.
   0.   0.   0.   0.   0.   0.   0.]
y value: ['[2.948278]', '[2.947183]', '[2.9457746]', '[2.9457276]', '[2.9454162]']
index: ['19006', '30649', '24823', '2186', '19845']


Unnamed: 0,id,name,score,ratings,category,lat,lng,American,Burgers,Fast Food,...,Desserts,Pizza,Salads,Chicken,Convenience,Italian,Everyday Essentials,Wings,Family Friendly,avg_price
19006,20401,"We, The Pizza",4.4,225.0,"Italian, Wings, Salads, Pizza, Healthy",38.85506,-77.049878,0,0,0,...,0,1,1,0,0,1,0,1,0,13.736667
30649,32758,Subway (Barbosa 65),4.4,277.0,"Sandwich, Salads, Breakfast and Brunch, Desser...",18.395449,-66.041189,1,0,1,...,1,1,1,0,0,1,0,0,0,6.80463
24823,26587,Pizza Boli's Wisconsin Avenue,4.7,335.0,"Pizza, American, Italian, Salads, Wings, Healt...",38.953568,-77.081746,1,0,0,...,0,1,1,0,0,1,0,1,1,16.194013
2186,2288,Pizza Shuttle,4.3,200.0,"Pizza, Salads, Wings, Fast Food, Desserts, Fam...",43.054655,-87.891048,0,0,1,...,1,1,1,0,0,0,0,1,0,18.257616
19845,21314,Chili's (4085 Virginia Beach Blvd.),4.1,200.0,"American, Burgers, Salads, Family Meals, Healt...",36.84158,-76.11536,1,1,0,...,0,0,1,0,0,0,0,1,0,11.444406
