In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:
data = pd.read_csv('Dataset .csv')

# Data preprocessing

In [3]:
data.drop(['Restaurant ID', 'Country Code','Locality', 'Locality Verbose', 'Longitude', 'Latitude', 'Currency', 'Has Table booking', 'Has Online delivery', 'Is delivering now', 'Switch to order menu', 'Rating color', 'Rating text', 'Votes'], axis=1, inplace=True)

In [4]:
data.isnull().sum()

Restaurant Name         0
City                    0
Address                 0
Cuisines                9
Average Cost for two    0
Price range             0
Aggregate rating        0
dtype: int64

In [5]:
print("Total unique cuisines:", data['Cuisines'].nunique())

Total unique cuisines: 1825


In [6]:
data.dropna(inplace=True)

###### Catagorical variables encoding

In [7]:
label_encoder = LabelEncoder()

# Encode Restaurant Name column
data['Restaurant Name'] = label_encoder.fit_transform(data['Restaurant Name'])

# Encode Cuisines column
data['Cuisines'] = label_encoder.fit_transform(data['Cuisines'])

# Encode Price range column
data['Price range'] = label_encoder.fit_transform(data['Price range'])

In [8]:
data

Unnamed: 0,Restaurant Name,City,Address,Cuisines,Average Cost for two,Price range,Aggregate rating
0,3742,Makati City,"Third Floor, Century City Mall, Kalayaan Avenu...",920,1100,2,4.8
1,3167,Makati City,"Little Tokyo, 2277 Chino Roces Avenue, Legaspi...",1111,1200,2,4.5
2,2892,Mandaluyong City,"Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...",1671,4000,3,4.4
3,4700,Mandaluyong City,"Third Floor, Mega Fashion Hall, SM Megamall, O...",1126,1500,3,4.9
4,5515,Mandaluyong City,"Third Floor, Mega Atrium, SM Megamall, Ortigas...",1122,1500,3,4.8
...,...,...,...,...,...,...,...
9546,4436,��stanbul,"Kemanke�� Karamustafa Pa��a Mahallesi, R۱ht۱m ...",1813,80,2,4.1
9547,1310,��stanbul,"Ko��uyolu Mahallesi, Muhittin ��st�_nda�� Cadd...",1824,105,2,4.2
9548,3063,��stanbul,"Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N...",1110,170,3,3.7
9549,512,��stanbul,"Kuru�_e��me Mahallesi, Muallim Naci Caddesi, N...",1657,120,3,4.0


# Applying Random Forest Classifier  

In [9]:
X = data[['Restaurant Name', 'Price range']]
y = data['Cuisines']


###### Splitting the Data

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [11]:
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [12]:
classifier = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
classifier.fit(X_train, y_train)

In [13]:
y_pred = classifier.predict(X_test)
print(list(zip(y_test, y_pred)))

[(1520, 1514), (177, 186), (834, 828), (1306, 186), (331, 331), (1306, 1275), (828, 1306), (331, 331), (1329, 1306), (1329, 450), (1387, 1387), (1705, 177), (730, 1329), (1329, 1306), (1334, 327), (400, 400), (1444, 1444), (1373, 1306), (181, 177), (186, 186), (1617, 1745), (1749, 1699), (1753, 331), (1329, 1306), (1329, 331), (986, 986), (1288, 1183), (1348, 1406), (683, 1001), (982, 982), (1514, 1514), (1626, 1626), (691, 705), (549, 1514), (525, 1306), (1306, 1306), (890, 201), (1306, 1235), (1031, 177), (99, 1329), (1554, 1306), (331, 331), (1323, 1323), (834, 1306), (497, 1306), (1454, 438), (1329, 1106), (518, 1573), (1306, 1306), (1275, 1288), (221, 1306), (837, 837), (497, 315), (177, 177), (487, 487), (497, 1655), (1401, 331), (1329, 1329), (1212, 1212), (233, 1514), (1298, 1348), (1306, 1514), (186, 186), (331, 1520), (1329, 191), (595, 331), (35, 549), (1306, 1275), (1816, 871), (1329, 1306), (969, 549), (1329, 1514), (1110, 905), (758, 774), (1306, 1306), (734, 1329), (173,

### Evaluation of model

In [19]:
classification_metrics = classification_report(y_test, y_pred)

print("Classification Metrics:")
print(classification_metrics)

Classification Metrics:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           2       0.00      0.00      0.00         0
           6       0.10      0.33      0.15         3
           7       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         0
          12       0.00      0.00      0.00         0
          15       0.00      0.00      0.00         1
          16       1.00      0.50      0.67         2
          18       0.00      0.00      0.00         0
          21       0.00      0.00      0.00         0
          23       0.00      0.00      0.00         0
          24       0.00      0.00      0.00         1
          25       0.00      0.00      0.00         1
          28       0.00      0.00      0.00         0
          29       0.00      0.00      0.00         3
          35       0.00      0.00      0.00         1
          36       0.00      0.00      0.00         1
   

### Exact accuracy

In [20]:
accuracy = accuracy_score(y_test, y_pred)
accuracy_percentage = accuracy * 100

print(f"Accuracy: {accuracy_percentage:.2f}%")

Accuracy: 26.56%
