In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

# Load your dataset
df = pd.read_csv("C:/Users/DynaBook/Downloads/archive (11)/zomato.csv",encoding="ISO-8859-1")  # Replace with the actual path to your dataset

# Step 1: Preprocess the data
# Fill missing values if any
df['Cuisines'] = df['Cuisines'].fillna('Unknown')
df['Price range'] = df['Price range'].fillna(df['Price range'].mean())
df['Aggregate rating'] = df['Aggregate rating'].fillna(df['Aggregate rating'].mean())

# Step 2: Combine relevant features (Cuisines, Price Range, and Aggregate Rating) into a single string
df['Combined Features'] = df['Cuisines'] + ' ' + df['Price range'].astype(str) + ' ' + df['Aggregate rating'].astype(str)

# Step 3: Create a count matrix based on the combined features
count_vectorizer = CountVectorizer()
count_matrix = count_vectorizer.fit_transform(df['Combined Features'])

# Step 4: Compute cosine similarity based on the count matrix
cosine_sim = cosine_similarity(count_matrix)

# Step 5: Function to recommend similar restaurants
def recommend_restaurants(restaurant_name, df, cosine_sim):
    try:
        # Get the index of the restaurant that matches the name
        idx = df[df['Restaurant Name'] == restaurant_name].index[0]
    except IndexError:
        return "Restaurant not found in the dataset."
    
    # Get a list of similarity scores for all restaurants
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the restaurants by similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the most similar restaurants (excluding the input restaurant itself)
    sim_scores = sim_scores[1:6]  # Adjust the number of recommendations as needed (e.g., top 5 similar restaurants)

    # Return the top 5 most similar restaurants
    restaurant_indices = [i[0] for i in sim_scores]
    
    # Return a DataFrame of recommended restaurants with relevant details
    return df[['Restaurant Name', 'Cuisines', 'Price range', 'Aggregate rating']].iloc[restaurant_indices]

# Example usage
restaurant_to_recommend = 'Din Tai Fung' 
recommendations = recommend_restaurants(restaurant_to_recommend, df, cosine_sim)

# Display the recommendations
print(recommendations)


                  Restaurant Name Cuisines  Price range  Aggregate rating
93   House of China Restaurant II  Chinese            1               3.8
172            Ting's Red Lantern  Chinese            1               4.2
199  Chef Lee's Peking Restaurant  Chinese            2               4.0
241                    China Cafe  Chinese            2               4.0
263              Tsing Tsao South  Chinese            1               4.1
