In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

In [2]:
df = pd.read_csv("C:/DATASETS/restaurent.csv")
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9551 entries, 0 to 9550
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         9551 non-null   int64  
 1   Restaurant Name       9551 non-null   object 
 2   Country Code          9551 non-null   int64  
 3   City                  9551 non-null   object 
 4   Address               9551 non-null   object 
 5   Locality              9551 non-null   object 
 6   Locality Verbose      9551 non-null   object 
 7   Longitude             9551 non-null   float64
 8   Latitude              9551 non-null   float64
 9   Cuisines              9542 non-null   object 
 10  Average Cost for two  9551 non-null   int64  
 11  Currency              9551 non-null   object 
 12  Has Table booking     9551 non-null   object 
 13  Has Online delivery   9551 non-null   object 
 14  Is delivering now     9551 non-null   object 
 15  Switch to order menu 

In [4]:
df.isnull().sum()

Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                9
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

In [5]:
df = df.dropna(axis=0)

In [6]:
# Drop irrelevant columns
df = df.drop(columns=['Restaurant ID',  'Country Code','Address', 'Locality', 'Locality Verbose', 'Rating color', 'Rating text'])

In [7]:
x = df.drop(columns=["Aggregate rating" ])
y = df["Aggregate rating"]

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(x,y, test_size=0.3, shuffle = True, random_state=42)

In [9]:
numeric_features = ['Longitude', 'Latitude', 'Average Cost for two', 'Price range', 'Votes']
categorical_features = ['Cuisines', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu']

In [10]:
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', StandardScaler())
])

categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('cat', categorical_transformer, categorical_features)
    ])


In [11]:
model = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42))
])

model.fit(X_train, Y_train)


In [12]:
y_pred = model.predict(X_test)
mse = mean_squared_error(Y_test, y_pred)
r2 = r2_score(Y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')

Mean Squared Error: 0.09433143424729307
R-squared: 0.9587997711997785


In [13]:
# Encoding categorical variables
data_encoded = pd.get_dummies(df, columns=['Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu'])

# Preprocessing for content-based filtering
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data_encoded['Cuisines'])

In [25]:
def recommend_restaurants(user_preferences, top_n=10):
    # Transform user preferences into TF-IDF vector
    user_preferences_vector = tfidf.transform([user_preferences])
    
    # Calculate cosine similarity between user preferences vector and restaurant TF-IDF matrix
    cosine_similarities = linear_kernel(user_preferences_vector, tfidf_matrix).flatten()
    
    # Get indices of top similar restaurants
    top_indices = cosine_similarities.argsort()[:-top_n-1:-1]
    
    # Recommend top similar restaurants
    recommended_restaurants = df.iloc[top_indices]
    return recommended_restaurants[['Restaurant Name', 'City', 'Aggregate rating','Cuisines']]

In [27]:
a = int(input("Enter the value of top restaurents : "))
print(pd.Series(df.columns))
b = str(input("Enter the recommended coloumns : ").capitalize())
Y = df[b]
top_rated_restaurants = df.loc[Y[Y.sort_values(ascending=False).index[:a]].index]
print("Top 10 Recommended Restaurants:")
(top_rated_restaurants[['Restaurant Name', 'City', 'Aggregate rating',b]])

Enter the value of top restaurents : 5
0          Restaurant Name
1                     City
2                Longitude
3                 Latitude
4                 Cuisines
5     Average Cost for two
6                 Currency
7        Has Table booking
8      Has Online delivery
9        Is delivering now
10    Switch to order menu
11             Price range
12        Aggregate rating
13                   Votes
dtype: object
Enter the recommended coloumns : cuisines
Top 10 Recommended Restaurants:


Unnamed: 0,Restaurant Name,City,Aggregate rating,Cuisines
9547,Ceviz A��ac۱,��stanbul,4.2,"World Cuisine, Patisserie, Cafe"
9525,Cafemiz,Ankara,4.4,"World Cuisine, Mexican, Italian"
9524,Gaga Manjero,Ankara,4.9,World Cuisine
471,I Am,Singapore,3.2,"Western, Fusion, Fast Food"
9285,MONKS,Jakarta,4.2,"Western, Asian, Cafe"


In [29]:
# Sample user preferences

user_preferences = str(input("Enter the recommended dish : "))

# Recommend restaurants based on user preferences
recommended_restaurants = recommend_restaurants(user_preferences)
print("Recommended Restaurants based on User Preferences:")
((recommended_restaurants))


Enter the recommended dish : cafe
Recommended Restaurants based on User Preferences:


Unnamed: 0,Restaurant Name,City,Aggregate rating,Cuisines
9550,Walter's Coffee Roastery,��stanbul,4.0,Cafe
7699,Vintro,New Delhi,3.9,Cafe
1586,Cafe Coffee Day,Gurgaon,3.3,Cafe
7764,Cafe Coffee Day,New Delhi,0.0,Cafe
7758,Costa Coffee,New Delhi,3.6,Cafe
1640,Starbucks,Gurgaon,3.5,Cafe
7740,Barista,New Delhi,3.3,Cafe
7724,The Cafe,New Delhi,3.8,Cafe
1697,Barista,Gurgaon,3.3,Cafe
1561,Starbucks,Gurgaon,3.5,Cafe
