###**IMPORT LIBRARIES**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix


###**IMPORT DATASET**

In [None]:
data = pd.read_csv("Dataset .csv")



###**PREPROCESS DATASET**

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4411 entries, 0 to 4410
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         4411 non-null   int64  
 1   Restaurant Name       4411 non-null   object 
 2   Country Code          4411 non-null   int64  
 3   City                  4411 non-null   object 
 4   Address               4411 non-null   object 
 5   Locality              4411 non-null   object 
 6   Locality Verbose      4411 non-null   object 
 7   Longitude             4411 non-null   float64
 8   Latitude              4411 non-null   float64
 9   Cuisines              4401 non-null   object 
 10  Average Cost for two  4410 non-null   float64
 11  Currency              4410 non-null   object 
 12  Has Table booking     4410 non-null   object 
 13  Has Online delivery   4410 non-null   object 
 14  Is delivering now     4410 non-null   object 
 15  Switch to order menu 

In [None]:
print(data.isnull().sum())

Restaurant ID            0
Restaurant Name          0
Country Code             0
City                     0
Address                  0
Locality                 0
Locality Verbose         0
Longitude                0
Latitude                 0
Cuisines                10
Average Cost for two     1
Currency                 1
Has Table booking        1
Has Online delivery      1
Is delivering now        1
Switch to order menu     1
Price range              1
Aggregate rating         1
Rating color             1
Rating text              1
Votes                    1
dtype: int64


In [None]:
data.dropna(subset=['Cuisines'], inplace=True)

In [None]:
data.isnull().sum()

Restaurant ID           0
Restaurant Name         0
Country Code            0
City                    0
Address                 0
Locality                0
Locality Verbose        0
Longitude               0
Latitude                0
Cuisines                0
Average Cost for two    0
Currency                0
Has Table booking       0
Has Online delivery     0
Is delivering now       0
Switch to order menu    0
Price range             0
Aggregate rating        0
Rating color            0
Rating text             0
Votes                   0
dtype: int64

###**ENCODING CATAGORICAL DATA**

In [None]:
numerical_features = data.select_dtypes(include=['float64', 'int64']).columns
for feature in numerical_features:
    data[feature].fillna(data[feature].mean(), inplace=True)

In [None]:
categorical_features = data.select_dtypes(include=['object']).columns
for feature in categorical_features:
    data[feature].fillna(data[feature].mode()[0], inplace=True)

In [None]:
selected_features = ['Restaurant Name','Country Code', 'Average Cost for two',
                     'Price range', 'Aggregate rating', 'Votes', 'Cuisines']

In [None]:
df = data[selected_features]
df.head()

Unnamed: 0,Restaurant Name,Country Code,Average Cost for two,Price range,Aggregate rating,Votes,Cuisines
0,Le Petit Souffle,162,1100.0,3.0,4.8,314.0,"French, Japanese, Desserts"
1,Izakaya Kikufuji,162,1200.0,3.0,4.5,591.0,Japanese
2,Heat - Edsa Shangri-La,162,4000.0,4.0,4.4,270.0,"Seafood, Asian, Filipino, Indian"
3,Ooma,162,1500.0,4.0,4.9,365.0,"Japanese, Sushi"
4,Sambo Kojin,162,1500.0,4.0,4.8,229.0,"Japanese, Korean"


In [None]:
cuisines_encoded = df['Cuisines'].str.get_dummies(sep=', ')

df_encoded = pd.concat([df, cuisines_encoded], axis=1)

df_encoded.drop(columns=['Cuisines'], inplace=True)

In [None]:
df_encoded

Unnamed: 0,Restaurant Name,Country Code,Average Cost for two,Price range,Aggregate rating,Votes,Afghani,African,American,Andhra,...,Tapas,Tea,Teriyaki,Tex-Mex,Thai,Tibetan,Turkish,Vegetarian,Vietnamese,Western
0,Le Petit Souffle,162,1100.0,3.0,4.8,314.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Izakaya Kikufuji,162,1200.0,3.0,4.5,591.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Heat - Edsa Shangri-La,162,4000.0,4.0,4.4,270.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Ooma,162,1500.0,4.0,4.9,365.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Sambo Kojin,162,1500.0,4.0,4.8,229.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4405,G's Patisserie,1,600.0,2.0,2.8,8.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4406,Giani,1,400.0,1.0,3.2,17.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4407,Gopals 56,1,200.0,1.0,3.2,8.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4408,Green Chick Chop,1,350.0,1.0,3.2,26.0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


###**SPLITTING INTO TRAIN AND TEST**

In [None]:
X = df_encoded.drop(columns=['Restaurant Name'])
y = df_encoded['Restaurant Name']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

###**USING RANDOM CLASSIFIER**

In [None]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)
y_train_pred = clf.predict(X_train)

###**EVALUATION OF MODEL**

In [None]:
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [None]:
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("Confusion Matrix:")
print(conf_matrix)


Accuracy: 0.10556186152099886
Precision: 0.09385856292780242
Recall: 0.10556186152099886
Confusion Matrix:
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]]
