In [5]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [6]:
# Load the Zomato data
df = pd.read_csv('BangaloreZomatoData.csv')

# Remove duplicates
df.drop_duplicates(inplace=True)

# Remove rows with NaN values
df.dropna(inplace=True)

# Fill missing values in 'PopularDishes' with 'Cuisines'
df['PopularDishes'].fillna(df['Cuisines'], inplace=True)

# Select relevant columns for content-based recommendation
content = df[['Name', 'Area', 'PopularDishes', 'Cuisines', 'AverageCost', 'Delivery Ratings', 'Dinner Ratings', 'IsHomeDelivery', 'isTakeaway', 'isIndoorSeating']].copy()  # Make a copy of the DataFrame

# Combine 'PopularDishes' and 'Cuisines' into a single feature using .loc
content.loc[:, 'CombinedFeatures'] = content['PopularDishes'] + ' ' + content['Cuisines']

# Drop unnecessary columns using .drop
content.drop(['PopularDishes', 'Cuisines'], axis=1, inplace=True)

# Drop rows with NaN values in the new feature using .dropna
content.dropna(inplace=True)


In [7]:
def recommend(dish):
    # Create TF-IDF vectorizer
    vectorizer = TfidfVectorizer(ngram_range=(1,2))
    
    # Fit and transform the combined features
    tfidf = vectorizer.fit_transform(content['CombinedFeatures'])
    
    # Transform the input dish into a TF-IDF vector
    query_vec = vectorizer.transform([dish])
    
    # Calculate cosine similarity between the input dish vector and all restaurant vectors
    similarity = cosine_similarity(query_vec, tfidf).flatten()
    
    # Get indices of top similar restaurants
    indices = np.argsort(similarity)[-3:][::-1]  # Get indices of top 3 most similar
    
    # Return recommended restaurants
    results = content.iloc[indices]
    
    return results


In [8]:
# Example usage: Recommend restaurants based on a dish
recommend('Andhra')


Unnamed: 0,Name,Area,AverageCost,Delivery Ratings,Dinner Ratings,IsHomeDelivery,isTakeaway,isIndoorSeating,CombinedFeatures
2835,Nagarjuna,"Koramangala 5th Block, Bangalore",700,4.3,4.4,1,1,1,"Andhra Mutton Curry, Gongura Pickle, Pepper Mu..."
2264,Shanmukha,"JP Nagar, Bangalore",650,4.1,4.4,1,1,1,"Pepper Chicken Wing, Spicy Andhra Biryani, Veg..."
2197,Meghana Foods,"Koramangala 5th Block, Bangalore",700,4.2,4.3,1,1,1,"Spicy Andhra Style Biryani, Meghna Special Chi..."


In [31]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings

# Ignore all warnings (not recommended for production code)
warnings.filterwarnings("ignore")

# Load restaurant data from CSV
df = pd.read_csv('BangaloreZomatoData.csv')

# Preprocess and clean data (remove duplicates, handle missing values, etc.)
df.drop_duplicates(inplace=True)
df.dropna(inplace=True)

# Extract relevant features for similarity calculation
features = df[['Name', 'Cuisines', 'PopularDishes']]

# Fill missing values in 'PopularDishes' with 'Cuisines'
features['PopularDishes'].fillna(features['Cuisines'], inplace=True)

# Combine relevant features into a single text-based feature for each restaurant
features['CombinedFeatures'] = features['Cuisines'] + ' ' + features['PopularDishes']

# Initialize TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Fit and transform the combined features to create TF-IDF vectors
tfidf_matrix = vectorizer.fit_transform(features['CombinedFeatures'])

# Calculate cosine similarities between restaurants based on TF-IDF vectors
restaurant_similarities = cosine_similarity(tfidf_matrix)
def item_item_collaborative_recommend(restaurant_name, top_n=5):
    try:
        # Find the index of the input restaurant in the dataframe
        restaurant_index = features.index[features['Name'] == restaurant_name]
        if restaurant_index.empty:
            print(f"No restaurant named '{restaurant_name}' found in the dataset.")
            return []

        restaurant_index = restaurant_index[0]

        # Get similarities of the input restaurant with all other restaurants
        similarities = list(enumerate(restaurant_similarities[restaurant_index]))

        # Sort similarities by score (descending order)
        similarities.sort(key=lambda x: x[1], reverse=True)

        # Exclude the input restaurant itself and get top similar restaurants
        similar_restaurants = []
        seen_restaurants = set()
        
        for idx, sim_score in similarities:
            if len(similar_restaurants) >= top_n:
                break
            if idx != restaurant_index:  # Exclude the input restaurant
                similar_restaurant_name = features.iloc[idx]['Name']
                if similar_restaurant_name not in seen_restaurants:
                    similar_restaurants.append(similar_restaurant_name)
                    seen_restaurants.add(similar_restaurant_name)

        return similar_restaurants
    except IndexError:
        print(f"No restaurant named '{restaurant_name}' found in the dataset.")
        return []

# Example usage: Recommend similar restaurants to 'Meghana Foods'
input_restaurant = 'Meghana Foods'
recommended_restaurants = item_item_collaborative_recommend(input_restaurant, top_n=5)

if recommended_restaurants:
    print(f"Top 5 Similar Restaurants to {input_restaurant}:")
    print(recommended_restaurants)
else:
    print("No recommendations available.")
    



Top 5 Similar Restaurants to Meghana Foods:
['Burger King', 'Plan B', 'The Hole In The Wall Cafe', 'Cafe Noir', "Chili's American Grill & Bar"]
