In [11]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the dataset
data=pd.read_csv('/content/Dataset .csv')
data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [12]:
data.info()
# Fill missing values with empty strings
data.fillna('', inplace=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9551 non-null   int64  
 1   Restaurant Name       9551 non-null   object 
 2   Country Code          9551 non-null   int64  
 3   City                  9551 non-null   object 
 4   Address               9551 non-null   object 
 5   Locality              9551 non-null   object 
 6   Locality Verbose      9551 non-null   object 
 7   Longitude             9551 non-null   float64
 8   Latitude              9551 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9551 non-null   int64  
 11  Currency              9551 non-null   object 
 12  Has Table booking     9551 non-null   object 
 13  Has Online delivery   9551 non-null   object 
 14  Is delivering now     9551 non-null   object 
 15  Switch to order menu 

In [13]:
# Select columns we want to use for recommendation
selected_columns = [
    'Cuisines',
    'Average Cost for Two',
    'Currency',
    'Has Table Booking',
    'Has Online Delivery',
    'Is Delivering Now',
    'Price Range',
    'Votes'
]

# Filter only columns that exist in the dataset
columns_to_use = [col for col in selected_columns if col in data.columns]

# Combine selected features into a single string
data['combined_features'] = data[columns_to_use].astype(str).apply(lambda x: ' '.join(x), axis=1)

# Convert text features into numbers using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(data['combined_features'])

# Calculate similarity between all restaurants
similarity_scores = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [4]:
# Function to recommend similar restaurants
def recommend_restaurants(restaurant_name, top_n=5):
    if restaurant_name not in data['Restaurant Name'].values:
        print("Restaurant not found in dataset.")
        print("\nSome available restaurant names:")
        print(data['Restaurant Name'].head(10).to_list())
        return []

    # Get the index of the given restaurant
    idx = data[data['Restaurant Name'] == restaurant_name].index[0]

    # Get similarity scores for this restaurant with all others
    scores = list(enumerate(similarity_scores[idx]))

    # Sort scores in descending order (skip the first one because it is the same restaurant)
    sorted_scores = sorted(scores, key=lambda x: x[1], reverse=True)[1:top_n+1]

    # Get restaurant names for top matches
    recommended = [data.iloc[i[0]]['Restaurant Name'] for i in sorted_scores]
    return recommended

In [6]:
# Test the recommendation system
sample_restaurant = data['Restaurant Name'].iloc[0]
print(f"\nTop recommended restaurants for '{sample_restaurant}':")
results = recommend_restaurants(sample_restaurant)
for i, name in enumerate(results, start=1):
    print(f"{i}. {name}")



Top recommended restaurants for 'Le Petit Souffle':
1. Izakaya Kikufuji
2. Ooma
3. Kake Di Hatti
4. Sambo Kojin
5. NIU by Vikings
