In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

In [2]:
dataset = pd.read_csv('Dataset.csv')
dataset.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,Le Petit Souffle,162,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...","Century City Mall, Poblacion, Makati City","Century City Mall, Poblacion, Makati City, Mak...",121.027535,14.565443,"French, Japanese, Desserts",...,Botswana Pula(P),Yes,No,No,No,3,4.8,Dark Green,Excellent,314
1,6304287,Izakaya Kikufuji,162,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...","Little Tokyo, Legaspi Village, Makati City","Little Tokyo, Legaspi Village, Makati City, Ma...",121.014101,14.553708,Japanese,...,Botswana Pula(P),Yes,No,No,No,3,4.5,Dark Green,Excellent,591
2,6300002,Heat - Edsa Shangri-La,162,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...","Edsa Shangri-La, Ortigas, Mandaluyong City","Edsa Shangri-La, Ortigas, Mandaluyong City, Ma...",121.056831,14.581404,"Seafood, Asian, Filipino, Indian",...,Botswana Pula(P),Yes,No,No,No,4,4.4,Green,Very Good,270
3,6318506,Ooma,162,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.056475,14.585318,"Japanese, Sushi",...,Botswana Pula(P),No,No,No,No,4,4.9,Dark Green,Excellent,365
4,6314302,Sambo Kojin,162,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...","SM Megamall, Ortigas, Mandaluyong City","SM Megamall, Ortigas, Mandaluyong City, Mandal...",121.057508,14.58445,"Japanese, Korean",...,Botswana Pula(P),Yes,No,No,No,4,4.8,Dark Green,Excellent,229


In [6]:
features_to_drop_cls = ['Restaurant ID', 'Address', 'Locality', 'Locality Verbose',
                        'Currency', 'Rating color', 'Rating text', 'Cuisines']

In [7]:
df_model = pd.get_dummies(dataset.drop(columns=features_to_drop_cls))
df_model['Cuisines'] = dataset['Cuisines']
df_model.dropna(subset=['Cuisines'], inplace=True)

In [8]:
X = df_model.drop(columns=['Cuisines'])
y = df_model['Cuisines']

In [9]:
le = LabelEncoder()
y = le.fit_transform(df_model['Cuisines'])
print(X)

      Country Code   Longitude   Latitude  Average Cost for two  Price range  \
0              162  121.027535  14.565443                  1100            3   
1              162  121.014101  14.553708                  1200            3   
2              162  121.056831  14.581404                  4000            4   
3              162  121.056475  14.585318                  1500            4   
4              162  121.057508  14.584450                  1500            4   
...            ...         ...        ...                   ...          ...   
9546           208   28.977392  41.022793                    80            3   
9547           208   29.041297  41.009847                   105            3   
9548           208   29.034640  41.055817                   170            4   
9549           208   29.036019  41.057979                   120            4   
9550           208   29.026016  40.984776                    55            2   

      Aggregate rating  Votes  Restaura

In [10]:
print(y)

[ 920 1111 1671 ... 1110 1657  331]


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [12]:
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [13]:
# Accuracy scores of Training Data
X_train_prediction = model.predict(X_train)
training_data_accuracy = accuracy_score(X_train_prediction, y_train)
print(f"Training Accuracy: {training_data_accuracy}")

# Accuracy scores of Testing Data
X_test_prediction = model.predict(X_test)
testing_data_accuracy = accuracy_score(X_test_prediction, y_test)
print(f"Testing Accuracy: {testing_data_accuracy}")

Training Accuracy: 1.0
Testing Accuracy: 0.24829753797799894


In [15]:
input_cuisine = "Italian".lower()

matched_restaurants = dataset[dataset['Cuisines'].str.lower().str.contains(input_cuisine, na=False)]

if not matched_restaurants.empty:
    print(f"\nRestaurants serving '{input_cuisine.title()}':\n")
    print(matched_restaurants[['Restaurant Name', 'Cuisines']].to_string(index=False))
else:
    print("No restaurants found with the given cuisine.")


Restaurants serving 'Italian':

                                       Restaurant Name                                                                            Cuisines
                                         Cafe Arabelle                                                   Cafe, American, Italian, Filipino
                              Nonna's Pasta & Pizzeria                                                                      Italian, Pizza
                               Wildflour Cafe + Bakery                                                     Cafe, Bakery, American, Italian
                         The Food Hall by Todd English                                                   American, Asian, Italian, Seafood
                                          Villa Tevere                                                                             Italian
                                                  Gero                                                                             It