In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Load the dataset
df = pd.read_csv('Dataset .csv')

# Drop rows with missing Cuisines
df.dropna(subset=['Cuisines'], inplace=True)

In [2]:
# 1. Extract the "Primary Cuisine" (first one in the list)
# Example: "North Indian, Mughlai" -> "North Indian"
df['Primary_Cuisine'] = df['Cuisines'].apply(lambda x: x.split(',')[0].strip())

# 2. Focus on the Top 10 Cuisines
# If we try to predict all 100+ cuisines, accuracy will be low. Let's keep the most common ones.
top_cuisines = df['Primary_Cuisine'].value_counts().head(10).index
print("Top 10 Cuisines:", top_cuisines)

# Filter the dataset to include only these top 10 cuisines
final_df = df[df['Primary_Cuisine'].isin(top_cuisines)].copy()

# 3. Handle Categorical Features
le = LabelEncoder()
final_df['Has Table booking'] = le.fit_transform(final_df['Has Table booking'])
final_df['Has Online delivery'] = le.fit_transform(final_df['Has Online delivery'])
final_df['Is delivering now'] = le.fit_transform(final_df['Is delivering now'])
final_df['Switch to order menu'] = le.fit_transform(final_df['Switch to order menu'])

# Optional: Encode 'City' as it might be a strong predictor (e.g., "New Delhi" -> "North Indian")
final_df['City_Code'] = le.fit_transform(final_df['City'])

Top 10 Cuisines: Index(['North Indian', 'Chinese', 'Fast Food', 'Bakery', 'Cafe', 'American',
       'South Indian', 'Mithai', 'Street Food', 'Continental'],
      dtype='object', name='Primary_Cuisine')


In [3]:
# Features: What information does the model use to guess?
# We use Cost, Rating, Votes, and Location (City)
X = final_df[['Average Cost for two', 'Has Table booking', 'Has Online delivery', 
              'Price range', 'Aggregate rating', 'Votes', 'City_Code']]

# Target: What are we guessing?
y = final_df['Primary_Cuisine']

# Split into Training (80%) and Testing (20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Initialize and train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

In [5]:
# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Detailed Report (Precision, Recall for each cuisine)
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

Model Accuracy: 0.39

Classification Report:

              precision    recall  f1-score   support

    American       0.56      0.62      0.59        48
      Bakery       0.20      0.17      0.18       115
        Cafe       0.30      0.28      0.29       127
     Chinese       0.17      0.08      0.11       165
 Continental       0.21      0.08      0.12        50
   Fast Food       0.15      0.12      0.14       137
      Mithai       0.30      0.20      0.24        56
North Indian       0.50      0.69      0.58       598
South Indian       0.00      0.00      0.00        57
 Street Food       0.31      0.22      0.26        50

    accuracy                           0.39      1403
   macro avg       0.27      0.25      0.25      1403
weighted avg       0.34      0.39      0.36      1403

