In [24]:
#Importing Libraries
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore',category=FutureWarning)


In [25]:
df = pd.read_csv("Dataset .csv")
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [26]:
#Checking any null values
df.isnull().sum()

Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

In [27]:
#Checking any zero values
(df ==0).sum()


Restaurant ID              0
Restaurant Name            0
Country Code               0
City                       0
Address                    0
Locality                   0
Locality Verbose           0
Longitude                498
Latitude                 498
Cuisines                   0
Average Cost for two      18
Currency                   0
Has Table booking          0
Has Online delivery        0
Is delivering now          0
Switch to order menu       0
Price range                0
Aggregate rating        2148
Rating color               0
Rating text                0
Votes                   1094
dtype: int64

In [28]:
#Dealing with Aggregate rating
df = df[df['Aggregate rating'] > 0 ]

In [29]:
#Dealing with Average Cost for two
df['Average Cost for two'] = df['Average Cost for two'].replace(0,np.nan)
df['Average Cost for two'] = pd.to_numeric(
    df['Average Cost for two'].replace('[₹,]','',regex=True), errors='coerce')
df['Average Cost for two'] = df['Average Cost for two'].fillna(df['Average Cost for two'].median())

In [30]:
#Dealing with cuisine col with its most frequent val

df['Cuisines'] = df['Cuisines'].fillna(df['Cuisines'].mode()[0])

df['Price range'] = pd.to_numeric(df['Price range'],errors='coerce')

In [31]:
#Converting yes/no to 1/0
df['Has Table booking'] = df['Has Table booking'].map({'Yes':1,'No':0}).astype(int)
df['Has Online delivery'] = df['Has Online delivery'].map({'Yes':1,'No':0}).astype(int)

In [32]:
#Creating new dataframe for necessary features
df_features = df[['Cuisines','Average Cost for two','Price range','Has Table booking','Has Online delivery','City','Aggregate rating']]

In [33]:
df_features.head()

Unnamed: 0,Cuisines,Average Cost for two,Price range,Has Table booking,Has Online delivery,City,Aggregate rating
0,"French, Japanese, Desserts",1100.0,3,1,0,Makati City,4.8
1,Japanese,1200.0,3,1,0,Makati City,4.5
2,"Seafood, Asian, Filipino, Indian",4000.0,4,1,0,Mandaluyong City,4.4
3,"Japanese, Sushi",1500.0,4,0,0,Mandaluyong City,4.9
4,"Japanese, Korean",1500.0,4,1,0,Mandaluyong City,4.8


In [34]:
#Making sure no more null, na values present in the df_features
df_features.isna().sum()

Cuisines                0
Average Cost for two    0
Price range             0
Has Table booking       0
Has Online delivery     0
City                    0
Aggregate rating        0
dtype: int64

In [35]:
df_features.isnull().sum()

Cuisines                0
Average Cost for two    0
Price range             0
Has Table booking       0
Has Online delivery     0
City                    0
Aggregate rating        0
dtype: int64

In [36]:
#Encoding cuisines for similarity
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(stop_words='english')
cuisine_matrix = vectorizer.fit_transform(df['Cuisines'].fillna(''))


In [37]:
cuisine_matrix

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 21937 stored elements and shape (7403, 146)>

In [38]:
#Onehot encoding for city
from sklearn.preprocessing import OneHotEncoder, StandardScaler

city_encoder = OneHotEncoder(handle_unknown='ignore',sparse_output=False)
city_encoded = city_encoder.fit_transform(df_features[['City']])

In [39]:
#Preparing final feature matrix by creating numeric+encoded+cuisines
from sklearn.preprocessing import StandardScaler

numeric_features = df_features.drop(columns=['Cuisines','City']).reset_index(drop=True)
scaler = StandardScaler()
numeric_scaled = scaler.fit_transform(numeric_features)


In [40]:

#Combining cuisine matri+numeric+encoded features
combined_features = np.hstack((cuisine_matrix.toarray(), city_encoded,numeric_scaled))

In [41]:
combined_features

array([[ 0.        ,  0.        ,  0.        , ...,  2.37978283,
        -0.68302371,  2.46302149],
       [ 0.        ,  0.        ,  0.        , ...,  2.37978283,
        -0.68302371,  1.91969821],
       [ 0.        ,  0.        ,  0.        , ...,  2.37978283,
        -0.68302371,  1.73859045],
       ...,
       [ 0.        ,  0.        ,  0.        , ..., -0.42020641,
        -0.68302371,  0.47083614],
       [ 0.        ,  0.        ,  0.        , ..., -0.42020641,
        -0.68302371,  1.01415941],
       [ 0.        ,  0.        ,  0.        , ..., -0.42020641,
        -0.68302371,  1.01415941]], shape=(7403, 292))

In [42]:
numeric_features

Unnamed: 0,Average Cost for two,Price range,Has Table booking,Has Online delivery,Aggregate rating
0,1100.0,3,1,0,4.8
1,1200.0,3,1,0,4.5
2,4000.0,4,1,0,4.4
3,1500.0,4,0,0,4.9
4,1500.0,4,1,0,4.8
...,...,...,...,...,...
7398,80.0,3,0,0,4.1
7399,105.0,3,0,0,4.2
7400,170.0,4,0,0,3.7
7401,120.0,4,0,0,4.0


In [43]:
df_features = df_features.copy()
df_features['Restaurant Name'] = df['Restaurant Name']

In [44]:
df_features.columns.tolist()

['Cuisines',
 'Average Cost for two',
 'Price range',
 'Has Table booking',
 'Has Online delivery',
 'City',
 'Aggregate rating',
 'Restaurant Name']

In [45]:
#Recommendation Function

def recommendation_from_preferences(df_features,top_n=5):
    #Default values
    defaults ={
        'city':'Columbus',
        'cuisine':'Pizza',
        'avg_cost':25,
        'price_range':2,
        'has_table':0,
        'has_online':0,

    }

    #Getting user inputs
    city = input(f"Enter your Preferred City: (default :{defaults['city']}) ").strip() or defaults['city']
    cuisine = input(f"What would you like to have?: (default :{defaults['cuisine']}) ").strip() or defaults['cuisine']

    try:
        avg_cost = input(f"What would be your Average Cost?  (default:{defaults['avg_cost']})").strip()
        avg_cost = int(avg_cost) if avg_cost else defaults['avg_cost']
    except:
        avg_cost = defaults['avg_cost']

    try:
        price_range = input(f"What would be your Price Range? (default:{defaults['price_range']})").strip()
        price_range = int(price_range) if price_range else defaults['price_range']
    except:
        price_range = defaults['price_range']

    try:
        has_table = input(f"Has Table Booking?(Yes:1/No:0) (default:{defaults['has_table']}) )").strip()
        has_table = int(has_table) if has_table else defaults['has_table']
    except:
        has_table = defaults['has_table']

    try:
        has_online = input(f"Has Online Delivery?(Yes:1/No:0) (default:{defaults['has_online']}) ").strip()
        has_online = int(has_online) if has_online else defaults['has_online']
    except:
        has_online = defaults['has_online']


    df_features =df_features.copy()
    df_features.loc[:,'City'] = df_features['City'].str.strip().str.lower()
    df_features.loc[:,'Cuisines'] = df_features['Cuisines'].str.strip().str.lower()
    city = city.strip().lower()
    cuisine = cuisine.strip().lower()


    #Hard Filter
    filtered_df = df_features[
        (df_features['City'].str.lower() == city.lower()) &
        (df_features['Price range'].astype(int) <= price_range) &
        (df_features['Has Table booking'] == has_table) &
        (df_features['Has Online delivery'] == has_online) &
        (df_features['Average Cost for two'] <= avg_cost) &
        (df_features['Cuisines'].str.lower().str.strip() == cuisine.lower().strip())
    ].copy()
    if filtered_df.empty:
        print("No Exact matches found.Try Changing your preferences.\n")
        return

    #Computing Similarity with real variation
    cuisine_match = filtered_df['Cuisines'].apply(lambda x: 1.0 if cuisine in x else 0.5)
     # Cost match: linear decay based on difference
    cost_diff = abs(filtered_df['Average Cost for two'] - avg_cost)
    max_diff = cost_diff.max() if cost_diff.max() != 0 else 1
    cost_match = 1 - (cost_diff / (max_diff * 2))  # normalize to 0.5~1.0
    cost_match = cost_match.clip(0.5,1.0)


    #Fianl Similarity Score with weights
    filtered_df['Similarity Score'] =(cuisine_match * 0.6 + cost_match * 0.4) * 5

    #Sorting and display
    recommendations = filtered_df.sort_values(by='Similarity Score', ascending=False).head(top_n)


    #Truncating long strings for neat output
    recommendations['Restaurant Name'] = recommendations['Restaurant Name'].str.slice(0,25)
    recommendations['Cuisines']= recommendations['Cuisines'].str.slice(0,30)

    if recommendations.empty:
        print("No Recommendations found based on your given preferences. \n")
    else:
        print("\n Top Recommendations:\n")
        print(recommendations[['Restaurant Name', 'Cuisines', 'Average Cost for two',
                               'Has Table booking', 'Price range', 'City','Aggregate rating', 'Similarity Score']]
              .to_string(index=False,
                         formatters={'Average Cost for two':'{:,.0f}'.format,
                                     'Similarity Score':'{:.2f}'.format,
                                     'Aggregate rating': '{:.1f}'.format
                                     }
                         )
              )


In [46]:
recommendation_from_preferences(df_features)

Enter your Preferred City: (default :Columbus)  Bhubaneshwar
What would you like to have?: (default :Pizza)  North Indian 
What would be your Average Cost?  (default:25) 300
What would be your Price Range? (default:2) 1
Has Table Booking?(Yes:1/No:0) (default:0) ) 0
Has Online Delivery?(Yes:1/No:0) (default:0)  0


No Exact matches found.Try Changing your preferences.



In [47]:
recommendation_from_preferences(df_features)

Enter your Preferred City: (default :Columbus)  
What would you like to have?: (default :Pizza)  
What would be your Average Cost?  (default:25) 
What would be your Price Range? (default:2) 
Has Table Booking?(Yes:1/No:0) (default:0) ) 
Has Online Delivery?(Yes:1/No:0) (default:0)  



 Top Recommendations:

Restaurant Name Cuisines Average Cost for two  Has Table booking  Price range     City Aggregate rating Similarity Score
Mellow Mushroom    pizza                   25                  0            2 columbus              4.1             5.00


In [53]:
recommendation_from_preferences(df_features)


 Top Recommendations:

   Restaurant Name            Cuisines Average Cost for two  Has Table booking  Price range           City Aggregate rating Similarity Score
 Garota de Ipanema brazilian, bar food                  120                  0            4 rio de janeiro              4.9             5.00
Braseiro da G��vea brazilian, bar food                  100                  0            4 rio de janeiro              4.9             4.50
 Aconchego Carioca brazilian, bar food                   85                  0            3 rio de janeiro              4.6             4.12
 Garota de Ipanema brazilian, bar food                   80                  0            3 rio de janeiro              4.3             4.00


In [52]:
recommendation_from_preferences(df_features)


Enter your Preferred City: (default :Columbus)  Rio De Janeiro
What would you like to have?: (default :Pizza)  Brazilian
What would be your Average Cost?  (default:25) 40
What would be your Price Range? (default:2) 2
Has Table Booking?(Yes:1/No:0) (default:0) ) 0
Has Online Delivery?(Yes:1/No:0) (default:0)  0



 Top Recommendations:

Restaurant Name  Cuisines Average Cost for two  Has Table booking  Price range           City Aggregate rating Similarity Score
     Leme Light brazilian                   40                  0            2 rio de janeiro              4.2             5.00
