In [None]:
!pip install openpyxl

In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.cluster import KMeans

# Load and prepare data
zomato_df = pd.read_csv(r'C:\devopspro2\app\zomato.csv', encoding='latin1')
country_df = pd.read_excel(r'C:\devopspro2\app\Country-Code.xlsx')  # Fixed path here
merged_df = pd.merge(zomato_df, country_df, on='Country Code', how='left')

merged_df = merged_df[['Restaurant Name', 'City', 'Cuisines', 'Average Cost for two', 'Aggregate rating', 'Country']]
merged_df.dropna(inplace=True)
merged_df['Cuisines'] = merged_df['Cuisines'].apply(lambda x: x.split(',')[0].strip())
merged_df.rename(columns={'Average Cost for two': 'Cost', 'Aggregate rating': 'Rating'}, inplace=True)

# Define features and pipeline
categorical_features = ['City', 'Cuisines', 'Country']
numerical_features = ['Cost', 'Rating']

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

kmeans = KMeans(n_clusters=10, random_state=42)

pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('kmeans', kmeans)
])

features_df = merged_df[numerical_features + categorical_features]
pipeline.fit(features_df)
merged_df['Cluster'] = pipeline.predict(features_df)

# Recommendation function
def recommend_restaurants(user_pref, df, pipeline, top_n=5):
    user_df = pd.DataFrame([user_pref])
    user_cluster = pipeline.predict(user_df)[0]
    cluster_restaurants = df[df['Cluster'] == user_cluster].copy()
    
    features = ['Cost', 'Rating', 'City', 'Cuisines', 'Country']
    cluster_features = cluster_restaurants[features]
    
    cluster_transformed = pipeline.named_steps['preprocessor'].transform(cluster_features)
    user_transformed = pipeline.named_steps['preprocessor'].transform(user_df)
    
    if hasattr(cluster_transformed, "toarray"):
        cluster_transformed = cluster_transformed.toarray()
    if hasattr(user_transformed, "toarray"):
        user_transformed = user_transformed.toarray()
    
    distances = np.linalg.norm(cluster_transformed - user_transformed, axis=1)
    cluster_restaurants['Distance'] = distances
    
    recommendations = cluster_restaurants.sort_values('Distance').head(top_n)
    return recommendations[['Restaurant Name', 'City', 'Cuisines', 'Country', 'Cost', 'Rating']]

# --- Take input from user ---
print("Enter your preferences to get restaurant recommendations:")

city = input("City: ").strip()
cuisine = input("Cuisine (e.g., Chinese, Italian): ").strip()
country = input("Country (e.g., India, UAE): ").strip()

while True:
    try:
        cost = float(input("Average cost for two (e.g., 700): ").strip())
        break
    except ValueError:
        print("Please enter a valid number for cost.")

while True:
    try:
        rating = float(input("Preferred minimum rating (0 to 5): ").strip())
        if 0 <= rating <= 5:
            break
        else:
            print("Rating must be between 0 and 5.")
    except ValueError:
        print("Please enter a valid number for rating.")

user_preferences = {
    'City': city,
    'Cuisines': cuisine,
    'Country': country,
    'Cost': cost,
    'Rating': rating
}

# Get recommendations
result = recommend_restaurants(user_preferences, merged_df, pipeline)
print("\nTop restaurant recommendations based on your preferences:\n")
print(result.to_string(index=False))


Enter your preferences to get restaurant recommendations:

Top restaurant recommendations based on your preferences:

                   Restaurant Name      City Cuisines Country  Cost  Rating
                     Gola Sizzlers New Delhi  Chinese   India  1600     3.0
                          Voda Bar New Delhi  Chinese   India  2200     3.2
                      Lotus Garden New Delhi  Chinese   India  2000     3.2
Fortune Deli -  Fortune Inn Grazia     Noida  Chinese   India  2000     3.2
                           Embassy New Delhi  Chinese   India  1200     3.0
