In [1]:
# Task 2: Restaurant Recommendation System
# Internship at Cognifyz Technologies - Machine Learning Intern

# Objective: Recommend restaurants based on user preferences using content-based filtering

# Step 1: Import required libraries
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Step 2: Load dataset
df = pd.read_csv("Dataset .csv")
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3.0,4.8,Dark Green,Excellent,314.0
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3.0,4.5,Dark Green,Excellent,591.0
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4.0,4.4,Green,Very Good,270.0
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4.0,4.9,Dark Green,Excellent,365.0
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4.0,4.8,Dark Green,Excellent,229.0


In [11]:
# Step 3: Keep relevant columns for recommendation
df_rec = df[['Restaurant Name', 'Cuisines', 'Average Cost for two', 'Currency', 'Aggregate rating']].copy()
df_rec = df_rec.dropna(subset=['Cuisines'])

In [12]:
# Step 4: Convert text features into a single string for content-based filtering
def preprocess(row):
    return f"{row['Cuisines']} {row['Currency']} Cost_{int(row['Average Cost for two'])}"

df_rec = df_rec.copy()
df_rec['content'] = df_rec.apply(preprocess, axis=1)

In [13]:
# Step 5: Convert content to TF-IDF vectors
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_rec['content'])

In [14]:
# Step 6: Compute cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [15]:
# Step 7: Recommendation function
def recommend_restaurants(user_cuisine, user_currency, user_budget, top_n=5):
    user_pref = f"{user_cuisine} {user_currency} Cost_{user_budget}"
    user_vec = tfidf.transform([user_pref])
    sim_scores = cosine_similarity(user_vec, tfidf_matrix).flatten()
    top_indices = sim_scores.argsort()[-top_n:][::-1]
    
    return df_rec.iloc[top_indices][['Restaurant Name', 'Cuisines', 'Average Cost for two', 'Aggregate rating']]

In [16]:
# Step 8: Sample user input and test
sample_recommendation = recommend_restaurants(
    user_cuisine='Italian',
    user_currency='Indian Rupees',
    user_budget=500
)

sample_recommendation

Unnamed: 0,Restaurant Name,Cuisines,Average Cost for two,Aggregate rating
1482,Chilli Indiana,Italian,500.0,3.8
1106,Da Pizza Zone,Italian,500.0,0.0
3705,Sinyora's,Italian,500.0,4.0
1324,Angrezee Choupal,"North Indian, Chinese, Italian",500.0,3.8
3883,Penta Cafe,"Fast Food, Italian",500.0,3.6
