In [40]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# 1. Load the dataset
data = pd.read_csv("final.csv")

# 2. Check the columns (modify as needed based on your dataset)
# print(data.head())

# Example columns: ['name', 'cuisine', 'price_for_two', 'signature_dish', 'location', 'rating']

# 3. Create a 'profile' combining relevant features including the signature dish
data['profile'] = (
    data['cuisine'].astype(str) + " " +
    data['price_for_two'].astype(str) + " " +
    data['signature dishes'].fillna("").astype(str)
)

# 4. Vectorize the profiles using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(data['profile'])

# 5. Define a function to get restaurant recommendations based on user input
def recommend_restaurants(liked_restaurant, cuisine, price_for_two, top_n=5):
    # Filter the dataset based on input cuisine and price range
    filtered_data = data[
        (data['cuisine'].str.contains(cuisine, case=False, na=False)) &
        (data['price_for_two'] <= price_for_two)
    ]

    if filtered_data.empty:
        return "No restaurants found matching your criteria."

    # Find the index of the liked restaurant
    liked_index = data[data['names'].str.contains(liked_restaurant, case=False, na=False)].index

    if liked_index.empty:
        return "The liked restaurant is not found in the dataset."

    liked_index = liked_index[0]

    # Calculate similarity between the liked restaurant and others
    similarity_scores = cosine_similarity(tfidf_matrix[liked_index], tfidf_matrix[filtered_data.index])

    # Get top N recommendations (excluding the liked restaurant itself)
    filtered_data['similarity'] = similarity_scores.flatten()
    recommendations = (
        filtered_data.sort_values(by='similarity', ascending=False)
        .head(top_n + 1)  # +1 to include the liked restaurant for filtering
    )

    # Exclude the liked restaurant from the final recommendations
    recommendations = recommendations[recommendations['names'].str.lower() != liked_restaurant.lower()]

    # Return the top N recommendations
    return recommendations[['names', 'cuisine', 'signature dishes', 'price_for_two', 'ratings', 'location']]

# 6. Example: Get user input and recommend restaurants
liked_restaurant = input("Enter a restaurant you liked: ")
cuisine = input("Enter preferred cuisine: ")
price_for_two = int(input("Enter your budget for two people: "))

# 7. Get recommendations
result = recommend_restaurants(liked_restaurant, cuisine, price_for_two)[:5]
result = pd.DataFrame(result)
# result = result.to_dict('records')
print(result.head())


Enter a restaurant you liked: nannayya
Enter preferred cuisine: 
Enter your budget for two people: 2500
                                                  names  \
1887                                Shree Golden Palace   
760                                    Vantinti Kathalu   
238                                           Bisi Bisi   
1754                     Garam Masala Family Restaurant   
996   Hotel Mayukha Multicuisine Family Restaurant &...   

                                                cuisine signature dishes  \
1887       North Indian, Biryani, Chinese, South Indian              NaN   
760                     South Indian, Biryani, Desserts              NaN   
238   South Indian, Chinese, North Indian, Wraps, De...              NaN   
1754                     North Indian, Chinese, Biryani              NaN   
996                      North Indian, Biryani, Chinese              NaN   

      price_for_two  ratings                 location  
1887          800.0      4.0

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['similarity'] = similarity_scores.flatten()


In [46]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load dataset
df = pd.read_csv("final.csv")

# Step 1: Vectorize and Precompute Similarity Matrices
def compute_similarity_matrix(column):
    vectorizer = TfidfVectorizer(stop_words='english')
    matrix = vectorizer.fit_transform(df[column].fillna(''))
    return cosine_similarity(matrix)

cuisine_sim_matrix = compute_similarity_matrix('cuisine')
dish_sim_matrix = compute_similarity_matrix('signature dishes')
location_sim_matrix = compute_similarity_matrix('location')

# Step 2: Save matrices (optional) to avoid recomputing every time
# np.save('cuisine_sim_matrix.npy', cuisine_sim_matrix)
# np.save('dish_sim_matrix.npy', dish_sim_matrix)
# np.save('location_sim_matrix.npy', location_sim_matrix)

# Step 3: Function to Recommend Restaurants Using Precomputed Similarity
def recommend_restaurants_with_precomputed(
    prev_restaurant=None, min_budget=None, max_budget=None, user_location=None, top_n=5
):
    filtered_df = df

    # Filter by budget if provided
    if min_budget is not None and max_budget is not None:
        filtered_df = filtered_df[
            (filtered_df['price_for_two'] >= min_budget) &
            (filtered_df['price_for_two'] <= max_budget)
        ]

    # Handle missing restaurant input
    idx = df.index[0]  # Default to first restaurant if input not provided
    if prev_restaurant:
        try:
            idx = df[df['names'].str.lower() == prev_restaurant.lower()].index[0]
        except IndexError:
            print("Previous restaurant not found, using default.")

    # Use Precomputed Similarity Scores
    cuisine_sim = cuisine_sim_matrix[idx]
    dish_sim = dish_sim_matrix[idx]

    if user_location:
        location_sim = location_sim_matrix[idx]
    else:
        location_sim = np.zeros(len(df))

    # Calculate Final Score
    final_scores = (
        0.3 * cuisine_sim +
        0.3 * dish_sim +
        0.2 * location_sim
    )

    # Get Top Recommendations
    top_indices = np.argsort(final_scores)[::-1][:top_n]
    recommendations = df.iloc[top_indices][
        ['names', 'cuisine', 'location', 'price_for_two', 'ratings']
    ]
    return recommendations

# Example Usage
city_rec = recommend_restaurants_with_precomputed(
    prev_restaurant="osaka",
    user_location="", top_n=5
)

print("Top Recommended Restaurants:")
print(city_rec)


Top Recommended Restaurants:
                                 names  \
401                              Osaka   
10      Sakura - A Far Eastern Kitchen   
1426                      China Bistro   
979                            You Mee   
137   Ha - Luo Oriental Cafe & Kitchen   

                                                cuisine  \
401                Asian, Japanese, Sushi, Korean, Thai   
10    Japanese, Asian, Thai, Sushi, Korean, Chinese,...   
1426  Chinese, Asian, Sushi, Japanese, Korean, Thai,...   
979   Asian, Japanese, Sushi, Chinese, Thai, Dessert...   
137   Oriental, Chinese, Japanese, Sichuan, Desserts...   

                      location  price_for_two  ratings  
401   Jubilee Hills, Hyderabad         1000.0      4.2  
10    Jubilee Hills, Hyderabad         1600.0      4.4  
1426  Jubilee Hills, Hyderabad         1800.0      4.6  
979      Kukatpally, Hyderabad         2000.0      4.4  
137     Hitech City, Hyderabad         1400.0      4.5  
