In [16]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from scipy.sparse import csr_matrix, hstack
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import TruncatedSVD

In [17]:
# Load the dataset
menu_data = pd.read_csv('updated_dataset.csv')

In [18]:
menu_data.info()
menu_data.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5474 entries, 0 to 5473
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   restaurant_id  5474 non-null   int64  
 1   category_x     5474 non-null   object 
 2   name_x         5474 non-null   object 
 3   description    5474 non-null   object 
 4   price          5474 non-null   float64
 5   Rating         5474 non-null   int64  
 6   name_y         5474 non-null   object 
 7   category_y     5474 non-null   object 
 8   price_range    5474 non-null   object 
dtypes: float64(1), int64(2), object(6)
memory usage: 385.0+ KB


Unnamed: 0,restaurant_id,category_x,name_x,description,price,Rating,name_y,category_y,price_range
0,7942,Appetizer,Thai Fried Chicken Wings,Marinated and deep fried wings served with swe...,10.95,5,Thai Greenwood Restaurant,"Thai, Asian Fusion, Noodles",medium
1,11244,Appetizers,Mai Thai Spring,"Crispy rolls stuffed with marinated shrimp, se...",11.95,5,Mai Thai Portland,"Thai, Asian, Noodles",medium
2,2978,Appetizers,Kaki Fry,Fried oysters. Served with mayo and vegetable ...,6.75,5,Ramen Station,"Japanese, Ramen",low
3,8353,Appetizers,Botana Mixta,"Combination of guacamole, quesadillas, nacho, ...",18.0,5,Casa Guerrero,"Seafood, Mexican, Vegetarian Friendly",medium
4,1600,Appetizers,Homemade Fries,Served with owl dipping sauce.,3.75,5,The Green Owl,"American, Traditional American, Pub, Bar Food,...",low
5,1636,Appetizers,Bacon Mac Waffle Fry Tray,A full order of our Waffle Fries topped with h...,11.99,4,Brat House Grill,"American, Burgers, Desserts, Exclusive to Eats",medium
6,10384,Apsara Noodle Dishes,Stir Fried Cantonese Noodles,Choice of wind noodle or lo mein noodle. Prepa...,10.5,5,Apsara Palace Restaurant,"Asian Fusion, Steak, Salads",medium
7,12796,Appetizers,Steam Shrimp Dumbling,Serve w/ soy sauce.,12.0,5,Saigon Rendezvous,"Vietnamese, Thai, Family Friendly",medium
8,10719,Appetizers,Bacon cheese bread sticks,Bacon and mozzarella cheese,7.99,5,Crown Pizza,"Vegetarian Friendly, Vegan Friendly, Mediterra...",low
9,12864,Appetizers,Fresh Tofu Spring Rolls-Goi Cuon Dau Hu,"Carrot, daikon, lettuce, basil, and herbs serv...",5.0,5,Meekong Bar,"Asian Fusion, South East Asian, Vietnamese",low


In [19]:
# Preprocessing: Handle missing values
menu_data['description'] = menu_data['description'].fillna('')

In [20]:
# Combine textual features into one column for TF-IDF
menu_data['combined_text'] = menu_data['category_x'] + ' ' + menu_data['name_x'] + ' ' + menu_data['description']

In [21]:
# Initialize TF-IDF Vectorizer with bi-grams
tfidf = TfidfVectorizer(stop_words='english', ngram_range=(1, 3))
tfidf_matrix = tfidf.fit_transform(menu_data['combined_text'])

In [22]:
# Adding weight to the description feature
menu_data['description_weighted'] = menu_data['description'] * 3
menu_data['combined_text'] = menu_data['category_x'] + ' ' + menu_data['name_x'] + ' ' + menu_data['description_weighted']

In [23]:
# Keep a copy of the original ratings
menu_data['original_rating'] = menu_data['Rating']
menu_data['original_price'] = menu_data['price']

In [24]:
# Normalize numerical features (rating and price)
scaler = StandardScaler()
menu_data[['Rating', 'price']] = scaler.fit_transform(menu_data[['Rating', 'price']])

In [25]:
#Combine TF-IDF matrix with normalized numerical features
numerical_features = menu_data[['Rating', 'price']]
extended_numerical_features = csr_matrix(numerical_features.values)

In [26]:
# Combine sparse matrices directly
combined_features = hstack([tfidf_matrix, extended_numerical_features])

In [27]:
# Apply TruncatedSVD for dimensionality reduction
svd = TruncatedSVD(n_components=150)  # Adjust n_components based on the trade-off between memory and accuracy
reduced_features = svd.fit_transform(combined_features)

In [28]:
def cosine_similarity_manual(matrix):
    dot_product = np.dot(matrix, matrix.T)
    norm = np.linalg.norm(matrix, axis=1)
    similarity = dot_product / (norm[:, None] * norm[None, :])
    return similarity

# Calculate the cosine similarity matrix
cosine_sim = cosine_similarity_manual(reduced_features)

In [29]:
def recommend_menu(menu_name, menu_data, cosine_sim, top_n=5):
    if menu_name not in menu_data['name_x'].values:
        raise ValueError(f"{menu_name} not found in menu.")

    # Get the index of the input menu item
    input_idx = menu_data[menu_data['name_x'] == menu_name].index[0]
    
    # Get similarity scores for the input item
    sim_scores = list(enumerate(cosine_sim[input_idx]))
    
    # Sort by similarity score
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Get the top n most similar items (including the input menu item itself)
    top_items = sim_scores[:top_n]
    
    recommended_menus = [{
        'Menu Item': menu_data.loc[i, 'name_x'],
        'Category': menu_data.loc[i, 'category_x'],
        'Rating': menu_data.loc[i, 'original_rating'],
        'Description': menu_data.loc[i, 'description'],
        'Price': menu_data.loc[i, 'original_price'],
        'Booth Number': menu_data.loc[i, 'restaurant_id'],
        'Booth Name': menu_data.loc[i, 'name_y'],
        'Similarity Score': score
    } for i, score in top_items]

    return recommended_menus

In [30]:
# Example usage:
menu_name = "Scallop with Broccoli"
top_recommendations = recommend_menu(menu_name, menu_data, cosine_sim)

# Convert the recommendations to a DataFrame
recommendations_df = pd.DataFrame(top_recommendations)

print(f"Top {len(top_recommendations)} recommendations for '{menu_name}':")
print(recommendations_df.to_string(index=False))

Top 5 recommendations for 'Scallop with Broccoli':
                              Menu Item        Category  Rating                        Description  Price  Booth Number                         Booth Name  Similarity Score
                  Scallop with Broccoli Lunch - Seafood       5          Served with white rice. .   7.75          4074           Golden Chopsticks Buffet          1.000000
Oyster Sauce With Broccoli and Mushroom   Veggie Dishes       5            Served with white rice.   8.25          3404 Cantonese House Chinese Restaurant          0.939408
                Broccoli with Bean Curd      Vegetables       5            Served with white rice.   6.50          6683                         China Star          0.930360
                      Yu-shan Vegetable   Veggie Dishes       5       Hot. Served with white rice.   8.50          3404 Cantonese House Chinese Restaurant          0.928145
    Chicken with Broccoli and Snow Peas         Poultry       5 With white sauce. Wi

In [31]:
#from sklearn.metrics import precision_score, recall_score, mean_absolute_error

In [32]:
"""# Function to evaluate the model
def evaluate_model(actual_ratings, predicted_ratings):
    # Convert predicted ratings to integer values for comparison
    predicted_ratings = np.round(predicted_ratings).astype(int)
    
    # Calculate precision and recall for top recommendations
    precision = precision_score(actual_ratings, predicted_ratings, average='micro')
    recall = recall_score(actual_ratings, predicted_ratings, average='micro')
    
    # Calculate Mean Absolute Error (MAE) for rating predictions
    mae = mean_absolute_error(actual_ratings, predicted_ratings)
    
    return precision, recall, mae"""


"# Function to evaluate the model\ndef evaluate_model(actual_ratings, predicted_ratings):\n    # Convert predicted ratings to integer values for comparison\n    predicted_ratings = np.round(predicted_ratings).astype(int)\n    \n    # Calculate precision and recall for top recommendations\n    precision = precision_score(actual_ratings, predicted_ratings, average='micro')\n    recall = recall_score(actual_ratings, predicted_ratings, average='micro')\n    \n    # Calculate Mean Absolute Error (MAE) for rating predictions\n    mae = mean_absolute_error(actual_ratings, predicted_ratings)\n    \n    return precision, recall, mae"

In [33]:
"""# Example usage:
# Assuming you have actual ratings and predicted ratings from your model
actual_ratings = menu_data['original_rating'].values  # Actual ratings from your dataset
predicted_ratings = np.ones(len(actual_ratings)) * 5  # Example: Predicting all items with rating 5

precision, recall, mae = evaluate_model(actual_ratings, predicted_ratings)

print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")"""

'# Example usage:\n# Assuming you have actual ratings and predicted ratings from your model\nactual_ratings = menu_data[\'original_rating\'].values  # Actual ratings from your dataset\npredicted_ratings = np.ones(len(actual_ratings)) * 5  # Example: Predicting all items with rating 5\n\nprecision, recall, mae = evaluate_model(actual_ratings, predicted_ratings)\n\nprint(f"Precision: {precision:.4f}")\nprint(f"Recall: {recall:.4f}")\nprint(f"Mean Absolute Error (MAE): {mae:.4f}")'