In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, Concatenate, Dense, Flatten
from tensorflow.keras.models import Model
import warnings

warnings.filterwarnings('ignore', category=FutureWarning)
tf.get_logger().setLevel('ERROR')

# ---------- DATA PREPROCESSING ----------
try:
    final_data = pd.read_csv('enhanced_zomato_dataset_clean.csv')
except FileNotFoundError:
    print("Error: 'enhanced_zomato_dataset_clean.csv' not found.")
    exit()

final_data = final_data[['Restaurant_Name', 'City', 'Cuisine', 'Average_Rating', 'Prices', 'Place_Name', 'Item_Name']]

final_data['Cuisine'] = final_data['Cuisine'].str.lower().str.strip()
final_data['City'] = final_data['City'].str.lower().str.strip()
final_data['Item_Name'] = final_data['Item_Name'].str.lower().str.strip()

final_data = final_data.dropna(subset=['Restaurant_Name', 'City', 'Cuisine', 'Average_Rating', 'Prices', 'Item_Name'])
final_data['Prices'] = final_data['Prices'].fillna(final_data['Prices'].median())
final_data['Average_Rating'] = final_data['Average_Rating'].fillna(final_data['Average_Rating'].mean())
final_data = final_data.drop_duplicates(subset=['Restaurant_Name', 'Item_Name'])

# Filter valid ranges
final_data = final_data[
    (final_data['Average_Rating'] >= 1.0) & (final_data['Average_Rating'] <= 5.0) &
    (final_data['Prices'] >= 30) & (final_data['Prices'] <= 5000)
]

final_data['Price_Rating_Ratio'] = final_data['Prices'] / (final_data['Average_Rating'] + 1.0)

cuisine_le = LabelEncoder()
city_le = LabelEncoder()
final_data['Cuisine_enc'] = cuisine_le.fit_transform(final_data['Cuisine'])
final_data['City_enc'] = city_le.fit_transform(final_data['City'])

scaler = MinMaxScaler()
scaler.fit(final_data[['Average_Rating', 'Prices', 'Price_Rating_Ratio']])
final_data['Prices_scaled'] = scaler.transform(final_data[['Average_Rating', 'Prices', 'Price_Rating_Ratio']])[:, 1]

train, test = train_test_split(final_data, test_size=0.2, random_state=42)

# Prevent data leakage
X_train = [train['Cuisine_enc'].values, train['City_enc'].values, train['Prices_scaled'].values]
y_train = train['Average_Rating'].values

X_test = [test['Cuisine_enc'].values, test['City_enc'].values, test['Prices_scaled'].values]
y_test = test['Average_Rating'].values

# ---------- TENSORFLOW MODEL ----------
n_cuisines = final_data['Cuisine_enc'].nunique()
n_cities = final_data['City_enc'].nunique()

input_cuisine = Input(shape=(1,), name='cuisine')
input_city = Input(shape=(1,), name='city')
input_price = Input(shape=(1,), name='price')

emb_dim = 8
emb_cuisine = Embedding(n_cuisines, emb_dim, name='emb_cuisine')(input_cuisine)
emb_city = Embedding(n_cities, emb_dim, name='emb_city')(input_city)
flat_cuisine = Flatten()(emb_cuisine)
flat_city = Flatten()(emb_city)

features = Concatenate()([flat_cuisine, flat_city, input_price])
x = Dense(32, activation='relu')(features)
x = Dense(16, activation='relu')(x)
output = Dense(1, activation='linear')(x)

model = Model(inputs=[input_cuisine, input_city, input_price], outputs=output)
model.compile(optimizer='adam', loss='mse')

print("\n--- Training Model ---")
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=2)
print("--- Model Training Complete ---")

# ---------- RECOMMENDATION FUNCTION ----------
def recommend_top_restaurants(model, final_data, cuisine_le, city_le, scaler,
                             user_cuisine, user_city, user_rating=4.0, user_price=300, top_k=5,
                             relax_filters=True):
    user_cuisine_proc = user_cuisine.lower().strip()
    user_city_proc = user_city.lower().strip()

    try:
        cuisine_id = cuisine_le.transform([user_cuisine_proc])[0]
    except:
        print(f"\n[ERROR] Unknown cuisine '{user_cuisine}'.")
        try:
            city_le.transform([user_city_proc])
            print(f"[INFO] Proceeding with city '{user_city_proc}' but will ignore cuisine.")
            user_cuisine_proc = None 
        except:
             print(f"\n[ERROR] Unknown city '{user_city}'.")
             return pd.DataFrame()
    try:
        city_id = city_le.transform([user_city_proc])[0]
    except:
        print(f"\n[ERROR] Unknown city '{user_city}'.")
        return pd.DataFrame()

    # Calculate scores for all restaurants
    if 'Score' not in final_data.columns:
        print("\n[INFO] Calculating scores for all restaurants...")
        all_cuisines_enc = final_data['Cuisine_enc'].values
        all_cities_enc = final_data['City_enc'].values
        all_prices_scaled = final_data['Prices_scaled'].values
        scores = model.predict([all_cuisines_enc, all_cities_enc, all_prices_scaled], verbose=0).flatten()
        final_data['Score'] = scores

    base_filter_mask = (final_data['Average_Rating'] >= user_rating) & \
                       (final_data['Prices'] <= user_price) & \
                       (final_data['City'] == user_city_proc)
    
    if user_cuisine_proc:
        filtered = final_data[base_filter_mask & (final_data['Cuisine'] == user_cuisine_proc)]
    else:
        filtered = final_data[base_filter_mask]

    # Keyword prioritization
    if user_cuisine_proc:
        keyword = user_cuisine_proc
        keywords = keyword.split(' ')
        item_matches = filtered[filtered['Item_Name'].apply(lambda x: any(k in x for k in keywords))]
        
        if len(item_matches) > 0:
            print(f"[INFO] Prioritizing items containing '{keyword}'.")
            filtered = item_matches
        
        # --- FALLBACK LOGIC ---
        elif len(filtered) == 0:
            print(f"\n[WARNING] No restaurants found for cuisine '{user_cuisine_proc}' matching criteria.")
            print(f"[INFO] Searching for ALL cuisines in '{user_city_proc}'...")
            
            # Fallback 1: All cuisines, strict rating/price
            filtered = final_data[base_filter_mask]
            
            if len(filtered) == 0 and relax_filters:
                print(f"[INFO] Relaxing filters (Rating >= 3.5, Price doubled)...")
                # Fallback 2: All cuisines, relaxed filters
                relaxed_mask = (final_data['Average_Rating'] >= 3.5) & \
                               (final_data['Prices'] <= user_price * 2) & \
                               (final_data['City'] == user_city_proc)
                filtered = final_data[relaxed_mask]

                if len(filtered) == 0:
                      print(f"[INFO] Still no matches. Showing top-scoring restaurants in '{user_city_proc}'.")
                      # Fallback 3: Anything in city
                      filtered = final_data[final_data['City'] == user_city_proc]

    if len(filtered) == 0:
        print(f"[INFO] No restaurants found in '{user_city_proc}' in the entire dataset.")
        return pd.DataFrame()

    filtered = filtered.sort_values('Score', ascending=False)
    unique_restaurants = filtered.drop_duplicates(subset=['Restaurant_Name'], keep='first')
    results = unique_restaurants.head(top_k)
    
    return results

# ---------- PRINT FUNCTION ----------
def print_recommendations(top_restaurants):
    if top_restaurants is None or len(top_restaurants) == 0:
        print("\nNo recommendations found.")
        return
    print("\n--- Top Recommendations ---")
    for i, (_, row) in enumerate(top_restaurants.iterrows(), start=1):
        print(f"{i}. Restaurant: {row['Restaurant_Name']}")
        print(f"   (Best Match: {row['Item_Name']})")
        print(f"   Rating: {row['Average_Rating']} | Price: ${row['Prices']:.2f} | Predicted Score: {row['Score']:.2f}")
        print("-" * 20)

# ---------- MAIN ----------
if __name__ == "__main__":
    print("\n--- Restaurant Recommender ---")
    
    unique_cuisines = list(set(final_data['Cuisine']))
    unique_cities = list(set(final_data['City']))
    
    print(f"Available cuisines (top 10): {', '.join(unique_cuisines[:10])}...")
    print(f"Available cities (top 10): {', '.join(unique_cities[:10])}...")
    
    user_cuisine = input("\nEnter your preferred cuisine keyword: ").strip()
    user_city = input("Enter your city: ").strip()

    try:
        user_rating = float(input("Minimum rating (1-5) [default: 4.0]: ").strip())
        if not (1.0 <= user_rating <= 5.0): user_rating = 4.0
    except:
        user_rating = 4.0
    try:
        user_price = float(input("Maximum price [default: 300]: ").strip())
        if user_price < 0: user_price = 300.0
    except:
        user_price = 300.0
    try:
        top_k = int(input("How many results? [default: 5]: ").strip())
        if top_k < 1: top_k = 5
    except:
        top_k = 5

    top_restaurants = recommend_top_restaurants(
        model, final_data, cuisine_le, city_le, scaler,
        user_cuisine=user_cuisine, user_city=user_city,
        user_rating=user_rating, user_price=user_price, top_k=top_k
    )

    print_recommendations(top_restaurants)


--- Training Model ---
Epoch 1/10
2175/2175 - 3s - 1ms/step - loss: 0.4480 - val_loss: 0.0494
Epoch 2/10
2175/2175 - 2s - 734us/step - loss: 0.0490 - val_loss: 0.0508
Epoch 3/10
2175/2175 - 2s - 725us/step - loss: 0.0463 - val_loss: 0.0441
Epoch 4/10
2175/2175 - 2s - 710us/step - loss: 0.0438 - val_loss: 0.0439
Epoch 5/10
2175/2175 - 2s - 714us/step - loss: 0.0423 - val_loss: 0.0410
Epoch 6/10
2175/2175 - 2s - 721us/step - loss: 0.0419 - val_loss: 0.0409
Epoch 7/10
2175/2175 - 2s - 714us/step - loss: 0.0414 - val_loss: 0.0419
Epoch 8/10
2175/2175 - 2s - 716us/step - loss: 0.0408 - val_loss: 0.0411
Epoch 9/10
2175/2175 - 2s - 712us/step - loss: 0.0408 - val_loss: 0.0396
Epoch 10/10
2175/2175 - 2s - 711us/step - loss: 0.0404 - val_loss: 0.0394
--- Model Training Complete ---

--- Restaurant Recommender ---
Available cuisines (top 10): street food, mughlai, coffee, bbq, tibetan, north indian, shawarma, turkish, andhra, bakery...
Available cities (top 10): malleshwaram, bangalore, new del


Enter your preferred cuisine keyword:  biryani
Enter your city:  hyderabad
Minimum rating (1-5) [default: 4.0]:  4
Maximum price [default: 300]:  500
How many results? [default: 5]:  6



[INFO] Calculating scores for all restaurants...
[INFO] Prioritizing items containing 'biryani'.

--- Top Recommendations ---
1. Restaurant: Crystal Restaurant & Bar
   (Best Match: kaju biryani family pack)
   Rating: 4.05 | Price: $450.00 | Predicted Score: 3.91
--------------------
2. Restaurant: Sherton Restaurant
   (Best Match: mutton double biryani)
   Rating: 4.1 | Price: $450.00 | Predicted Score: 3.91
--------------------
