#  Restaurant Recommendation based on user preference 

In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('Dataset .csv')

In [3]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [4]:
data.head(5)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


## Pre-Processing

In [5]:
# Removing the unnecessary patterns
import re
pattern = r'��+|\�_+|���+|�'

# Use DataFrame.apply() to clean all columns
data = data.apply(lambda col: col.map(lambda x: re.sub(pattern, '', x) if isinstance(x, str) else x))

# Save the cleaned data if needed
data.to_csv('cleaned_dataset.csv', index=False)

In [6]:
data.head(5)

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [7]:
data.columns

Index(['Restaurant ID', 'Restaurant Name', 'Country Code', 'City', 'Address',
       'Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Cuisines',
       'Average Cost for two', 'Currency', 'Has Table booking',
       'Has Online delivery', 'Is delivering now', 'Switch to order menu',
       'Price range', 'Aggregate rating', 'Rating color', 'Rating text',
       'Votes'],
      dtype='object')

In [8]:
# sorting out the most important columns
data = data[['Restaurant Name', 'Cuisines', 'Price range', 'Average Cost for two', 'Aggregate rating', 'Votes'] ]

In [9]:
data


Unnamed: 0,Restaurant Name,Cuisines,Price range,Average Cost for two,Aggregate rating,Votes
0,Le Petit Souffle,"French, Japanese, Desserts",3,1100,4.8,314
1,Izakaya Kikufuji,Japanese,3,1200,4.5,591
2,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4,4000,4.4,270
3,Ooma,"Japanese, Sushi",4,1500,4.9,365
4,Sambo Kojin,"Japanese, Korean",4,1500,4.8,229
...,...,...,...,...,...,...
9546,Naml۱ Gurme,Turkish,3,80,4.1,788
9547,Ceviz Aac۱,"World Cuisine, Patisserie, Cafe",3,105,4.2,1034
9548,Huqqa,"Italian, World Cuisine",4,170,3.7,661
9549,Ak Kahve,Restaurant Cafe,4,120,4.0,901


In [10]:
data['Cuisines'].value_counts()

Cuisines
North Indian                                             936
North Indian, Chinese                                    511
Chinese                                                  354
Fast Food                                                354
North Indian, Mughlai                                    334
                                                        ... 
Bengali, Fast Food                                         1
North Indian, Rajasthani, Asian                            1
Chinese, Thai, Malaysian, Indonesian                       1
Bakery, Desserts, North Indian, Bengali, South Indian      1
Italian, World Cuisine                                     1
Name: count, Length: 1825, dtype: int64

In [11]:
data.describe()

Unnamed: 0,Price range,Average Cost for two,Aggregate rating,Votes
count,9551.0,9551.0,9551.0,9551.0
mean,1.804837,1199.210763,2.66637,156.909748
std,0.905609,16121.183073,1.516378,430.169145
min,1.0,0.0,0.0,0.0
25%,1.0,250.0,2.5,5.0
50%,2.0,400.0,3.2,31.0
75%,2.0,700.0,3.7,131.0
max,4.0,800000.0,4.9,10934.0


In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 6 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant Name       9551 non-null   object 
 1   Cuisines              9542 non-null   object 
 2   Price range           9551 non-null   int64  
 3   Average Cost for two  9551 non-null   int64  
 4   Aggregate rating      9551 non-null   float64
 5   Votes                 9551 non-null   int64  
dtypes: float64(1), int64(3), object(2)
memory usage: 447.8+ KB


In [13]:
for i in data.columns:
    print(f"{i}: {data[i].nunique()}")

Restaurant Name: 7445
Cuisines: 1825
Price range: 4
Average Cost for two: 140
Aggregate rating: 33
Votes: 1012


In [14]:
for i in data.columns:
    print(f"{i}: {data[i].isna().sum()}")

Restaurant Name: 0
Cuisines: 9
Price range: 0
Average Cost for two: 0
Aggregate rating: 0
Votes: 0


In [15]:
data = data.dropna()


In [16]:
data

Unnamed: 0,Restaurant Name,Cuisines,Price range,Average Cost for two,Aggregate rating,Votes
0,Le Petit Souffle,"French, Japanese, Desserts",3,1100,4.8,314
1,Izakaya Kikufuji,Japanese,3,1200,4.5,591
2,Heat - Edsa Shangri-La,"Seafood, Asian, Filipino, Indian",4,4000,4.4,270
3,Ooma,"Japanese, Sushi",4,1500,4.9,365
4,Sambo Kojin,"Japanese, Korean",4,1500,4.8,229
...,...,...,...,...,...,...
9546,Naml۱ Gurme,Turkish,3,80,4.1,788
9547,Ceviz Aac۱,"World Cuisine, Patisserie, Cafe",3,105,4.2,1034
9548,Huqqa,"Italian, World Cuisine",4,170,3.7,661
9549,Ak Kahve,Restaurant Cafe,4,120,4.0,901


In [17]:
data = data.sort_values(by=['Restaurant Name','Aggregate rating'],ascending=False)
data

Unnamed: 0,Restaurant Name,Cuisines,Price range,Average Cost for two,Aggregate rating,Votes
3120,{Niche} - Cafe & Bar,"North Indian, Chinese, Italian, Continental",3,1500,4.1,492
9334,wagamama,"Japanese, Asian",4,70,3.7,131
9523,ukuraa Sofras۱,"Kebab, Izgara",3,60,4.4,296
9454,tashas,"Cafe, Mediterranean",4,320,4.1,374
4659,t Lounge by Dilmah,"Cafe, Tea, Desserts",2,800,3.6,34
...,...,...,...,...,...,...
8692,#Urban Caf,"North Indian, Chinese, Italian",2,650,3.3,49
6998,#OFF Campus,"Cafe, Continental, Italian, Fast Food",2,800,3.7,216
2613,#InstaFreeze,Ice Cream,1,300,0.0,2
9148,#Dilliwaala6,North Indian,3,800,3.7,124


In [18]:
data['Restaurant Name'].value_counts()

Restaurant Name
Cafe Coffee Day        83
Domino's Pizza         79
Subway                 63
Green Chick Chop       51
McDonald's             48
                       ..
Parul's Cooking Hub     1
Pash!                   1
Passage 2 India         1
Pasta La Vista          1
#45                     1
Name: count, Length: 7436, dtype: int64

In [19]:
data = data.sort_values(by=['Restaurant Name','Aggregate rating'],ascending=False)

In [20]:
data[data['Restaurant Name']=='Cafe Coffee Day'].head(5)

Unnamed: 0,Restaurant Name,Cuisines,Price range,Average Cost for two,Aggregate rating,Votes
6430,Cafe Coffee Day,Cafe,1,450,3.6,58
8432,Cafe Coffee Day,Cafe,1,450,3.6,125
3946,Cafe Coffee Day,Cafe,1,450,3.5,35
5877,Cafe Coffee Day,Cafe,1,450,3.5,50
3001,Cafe Coffee Day,Cafe,1,450,3.4,277


In [21]:
data = data.drop_duplicates('Restaurant Name',keep='first')
data

Unnamed: 0,Restaurant Name,Cuisines,Price range,Average Cost for two,Aggregate rating,Votes
3120,{Niche} - Cafe & Bar,"North Indian, Chinese, Italian, Continental",3,1500,4.1,492
9334,wagamama,"Japanese, Asian",4,70,3.7,131
9523,ukuraa Sofras۱,"Kebab, Izgara",3,60,4.4,296
9454,tashas,"Cafe, Mediterranean",4,320,4.1,374
4659,t Lounge by Dilmah,"Cafe, Tea, Desserts",2,800,3.6,34
...,...,...,...,...,...,...
8692,#Urban Caf,"North Indian, Chinese, Italian",2,650,3.3,49
6998,#OFF Campus,"Cafe, Continental, Italian, Fast Food",2,800,3.7,216
2613,#InstaFreeze,Ice Cream,1,300,0.0,2
9148,#Dilliwaala6,North Indian,3,800,3.7,124


In [22]:
data['Restaurant Name'].value_counts()

Restaurant Name
{Niche} - Cafe & Bar    1
Frequent Bakes          1
Fozzie's Pizzaiolo      1
Frasers                 1
Fratini La Trattoria    1
                       ..
Pizza Street            1
Pizza Treat             1
Pizza Yum               1
Pizza l Forno           1
#45                     1
Name: count, Length: 7436, dtype: int64

In [23]:
data['Cuisines'].value_counts()

Cuisines
North Indian                             826
North Indian, Chinese                    441
Chinese                                  295
Fast Food                                267
North Indian, Mughlai                    244
                                        ... 
Asian, Chinese, Thai, Japanese             1
North Indian, Afghani                      1
Italian, North Indian, South Indian        1
Kerala, Fast Food                          1
Cafe, Continental, Italian, Fast Food      1
Name: count, Length: 1725, dtype: int64

In [24]:
# Split the cuisines into list
data['Cuisines'] = data['Cuisines'].str.split(', ')
data

Unnamed: 0,Restaurant Name,Cuisines,Price range,Average Cost for two,Aggregate rating,Votes
3120,{Niche} - Cafe & Bar,"[North Indian, Chinese, Italian, Continental]",3,1500,4.1,492
9334,wagamama,"[Japanese, Asian]",4,70,3.7,131
9523,ukuraa Sofras۱,"[Kebab, Izgara]",3,60,4.4,296
9454,tashas,"[Cafe, Mediterranean]",4,320,4.1,374
4659,t Lounge by Dilmah,"[Cafe, Tea, Desserts]",2,800,3.6,34
...,...,...,...,...,...,...
8692,#Urban Caf,"[North Indian, Chinese, Italian]",2,650,3.3,49
6998,#OFF Campus,"[Cafe, Continental, Italian, Fast Food]",2,800,3.7,216
2613,#InstaFreeze,[Ice Cream],1,300,0.0,2
9148,#Dilliwaala6,[North Indian],3,800,3.7,124


In [25]:
# Breaking the list such that each cuisine have a specific row
data = data.explode('Cuisines')
data

Unnamed: 0,Restaurant Name,Cuisines,Price range,Average Cost for two,Aggregate rating,Votes
3120,{Niche} - Cafe & Bar,North Indian,3,1500,4.1,492
3120,{Niche} - Cafe & Bar,Chinese,3,1500,4.1,492
3120,{Niche} - Cafe & Bar,Italian,3,1500,4.1,492
3120,{Niche} - Cafe & Bar,Continental,3,1500,4.1,492
9334,wagamama,Japanese,4,70,3.7,131
...,...,...,...,...,...,...
6998,#OFF Campus,Italian,2,800,3.7,216
6998,#OFF Campus,Fast Food,2,800,3.7,216
2613,#InstaFreeze,Ice Cream,1,300,0.0,2
9148,#Dilliwaala6,North Indian,3,800,3.7,124


In [26]:
data['Cuisines'].unique()

array(['North Indian', 'Chinese', 'Italian', 'Continental', 'Japanese',
       'Asian', 'Kebab', 'Izgara', 'Cafe', 'Mediterranean', 'Tea',
       'Desserts', 'British', 'Contemporary', 'American', 'Biryani',
       'South Indian', 'Fast Food', 'Healthy Food', 'Bakery',
       'International', 'Sandwich', 'Kashmiri', 'Mughlai', 'Pizza',
       'Indian', 'Seafood', 'Burger', 'Vegetarian', 'Juices', 'Beverages',
       'Lebanese', 'Middle Eastern', 'Arabian', 'Thai', 'Turkish Pizza',
       'Finger Food', 'Sushi', 'Ramen', 'Steak', 'Brazilian', 'Tibetan',
       'Awadhi', 'Pakistani', 'Kerala', 'Street Food', 'Lucknowi', 'Dner',
       'Bengali', 'Mithai', 'Nepalese', 'European', 'BBQ', 'Dim Sum',
       'Hyderabadi', 'Mexican', 'Salad', 'South American', 'Bar Food',
       'Charcoal Grill', 'Breakfast', 'Hawaiian', 'Latin American',
       'Argentine', 'Filipino', 'Restaurant Cafe', 'Ice Cream', 'Tapas',
       'Vietnamese', 'Sri Lankan', 'Andhra', 'Chettinad', 'Western',
       'Rajasth

In [27]:
data['Cuisines'].value_counts()

Cuisines
North Indian    3270
Chinese         2289
Fast Food       1262
Mughlai          777
Continental      670
                ... 
Peruvian           1
Sunda              1
Durban             1
Irish              1
Bubble Tea         1
Name: count, Length: 145, dtype: int64

## Model Building

In [28]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import jaccard_score
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np

In [29]:
# group the cuisines by restaurant
data_grouped = data.groupby(['Restaurant Name', 'Price range', 'Average Cost for two', 'Aggregate rating', 'Votes'])['Cuisines'].apply(lambda x: list(set(x))).reset_index()
data_grouped

Unnamed: 0,Restaurant Name,Price range,Average Cost for two,Aggregate rating,Votes,Cuisines
0,#45,2,600,3.6,209,[Cafe]
1,#Dilliwaala6,3,800,3.7,124,[North Indian]
2,#InstaFreeze,1,300,0.0,2,[Ice Cream]
3,#OFF Campus,2,800,3.7,216,"[Fast Food, Continental, Italian, Cafe]"
4,#Urban Caf,2,650,3.3,49,"[North Indian, Italian, Chinese]"
...,...,...,...,...,...,...
7431,t Lounge by Dilmah,2,800,3.6,34,"[Desserts, Tea, Cafe]"
7432,tashas,4,320,4.1,374,"[Mediterranean, Cafe]"
7433,ukuraa Sofras۱,3,60,4.4,296,"[Kebab, Izgara]"
7434,wagamama,4,70,3.7,131,"[Japanese, Asian]"


In [30]:

# encode the grouped cuisines using MultiLabelBinarizer
mlb = MultiLabelBinarizer()
cuisine_encoded = mlb.fit_transform(data_grouped['Cuisines'])

# Extract numerical features for cosine similarity
numerical_features = ['Price range', 'Average Cost for two', 'Aggregate rating']
numerical_matrix = data_grouped[numerical_features].values

# Compute cosine similarity for numerical features
cosine_sim = cosine_similarity(numerical_matrix)


In [56]:
def recommend_restaurants(preferred_cuisines, price_range, min_rating, top_n=5):
    # Filter based on user preferences for price range and rating
    filtered_df = data_grouped[(data_grouped['Price range'] == int(price_range)) & 
                               (data_grouped['Aggregate rating'] >= min_rating)]
    
    # Jaccard similarity for preferred cuisines
    preferred_cuisine_encoded = mlb.transform([preferred_cuisines])[0]  # Get the 1D array of cuisines
    
    # Initialize list to store combined similarities
    combined_similarities = []
    
    for index, row in filtered_df.iterrows():
        # Encode the restaurant's cuisine to match the user preference encoding
        restaurant_cuisine_encoded = cuisine_encoded[index]
        
        # Calculate Jaccard similarity for cuisines
        jaccard_sim = jaccard_score(preferred_cuisine_encoded, restaurant_cuisine_encoded, average='binary')
        
        # Extract numerical features for cosine similarity
        restaurant_features = np.array([[row['Price range'], row['Average Cost for two'], row['Aggregate rating']]])
        user_features = np.array([[int(price_range), row['Average Cost for two'], min_rating]])
        
        # Calculate cosine similarity for numerical features
        cosine_sim = cosine_similarity(restaurant_features, user_features)[0][0]
        
        # Combine Jaccard and Cosine similarity (you can weight them if necessary)
        combined_score = 0.5 * jaccard_sim + 0.5 * cosine_sim
        
        combined_similarities.append((row['Restaurant Name'], combined_score))
    
    # Sort by combined similarity score and get unique restaurant names
    combined_similarities.sort(key=lambda x: x[1], reverse=True)
    
    
    seen = set()
    recommended_restaurants = []
    for restaurant, score in combined_similarities:
        if restaurant not in seen:
            recommended_restaurants.append(restaurant)
            seen.add(restaurant)
        if len(recommended_restaurants) == top_n:
            break
    
    return recommended_restaurants




In [61]:
# Here you can run this to make reccomendations
def get_user_preferences():
    preferred_cuisines = [cuisine.strip() for cuisine in input("Enter your preferred cuisines (comma-separated): ").split(',')]
    price_range = input("Enter your preferred price range (1 to 4): ")
    min_rating = float(input("Enter your minimum aggregate rating (0 to 5): "))
    
    return preferred_cuisines, price_range,  min_rating


preferred_cuisines,price_range, min_rating =  get_user_preferences()
print(f"For the cuisines: {preferred_cuisines}, with minimum rating of {min_rating}, at a preferred price range of {price_range} the restuarants are:")
print(f"{recommend_restaurants(preferred_cuisines, price_range, min_rating)}")


Enter your preferred cuisines (comma-separated):  Chinese, Pizza
Enter your preferred price range (1 to 4):  3
Enter your minimum aggregate rating (0 to 5):  3


For the cuisines: ['Chinese', 'Pizza'], with minimum rating of 3.0, at a preferred price range of 3 the restuarants are:
['Pizzeria Vaatika Cafe', 'Chinese Dragon Cafe', 'Red - Bellagio', 'Joy Luck Moon', 'Chin Chin']


##### 1.This code utilizes Jaccard similarity to evaluate the overlap of preferred cuisines between the user and restaurants, allowing for personalized recommendations based on user-defined culinary preferences.
##### 2. The implementation of Cosine similarity for numerical features (Price range, Average Cost for two, and Aggregate rating) ensures that the recommendations are not only based on cuisine but also consider user preferences for price and quality, leading to more relevant results.
##### 3. The algorithm combines both Jaccard and Cosine similarities with equal weighting, which may provide balanced recommendations; however, users are encouraged to adjust these weightings based on their specific preferences to enhance recommendation relevance.
##### 4. The results generated by this system are based on the underlying dataset's quality and structure. Proper preprocessing and handling of the data can significantly improve the accuracy and usefulness of the recommendations provided.