<a href="https://colab.research.google.com/github/Thrishulgoud/Restaurant-ML-Project-task/blob/main/Cuisine_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🍽️ Task 3: Cuisine Classification (Google Colab Project)
This notebook builds a **classification model** to predict restaurant cuisines.

In [None]:
# Step 1: Upload Dataset
from google.colab import files
uploaded = files.upload()

import pandas as pd
data = pd.read_csv(" Datasetml.csv")
print(data.head())
print(data.columns)
print(data.info())
print(data.isnull().sum())

Saving  Datasetml.csv to  Datasetml.csv
   Restaurant ID         Restaurant Name  Country Code              City  \
0        6317637        Le Petit Souffle           162       Makati City   
1        6304287        Izakaya Kikufuji           162       Makati City   
2        6300002  Heat - Edsa Shangri-La           162  Mandaluyong City   
3        6318506                    Ooma           162  Mandaluyong City   
4        6314302             Sambo Kojin           162  Mandaluyong City   

                                             Address  \
0  Third Floor, Century City Mall, Kalayaan Avenu...   
1  Little Tokyo, 2277 Chino Roces Avenue, Legaspi...   
2  Edsa Shangri-La, 1 Garden Way, Ortigas, Mandal...   
3  Third Floor, Mega Fashion Hall, SM Megamall, O...   
4  Third Floor, Mega Atrium, SM Megamall, Ortigas...   

                                     Locality  \
0   Century City Mall, Poblacion, Makati City   
1  Little Tokyo, Legaspi Village, Makati City   
2  Edsa Shangri-La,

In [None]:
# Step 2: Preprocessing
from sklearn.preprocessing import LabelEncoder

# Handle missing values
for col in data.columns:
    if data[col].dtype == 'object':
        # data[col].fillna(data[col].mode()[0], inplace=True) # Removed inplace=True
        data[col] = data[col].fillna(data[col].mode()[0])
    else:
        # data[col].fillna(data[col].mean(), inplace=True) # Removed inplace=True
        data[col] = data[col].fillna(data[col].mean())


# Encode categorical columns
label_encoders = {}
for col in data.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

data.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Country Code,City,Address,Locality,Locality Verbose,Longitude,Latitude,Cuisines,...,Currency,Has Table booking,Has Online delivery,Is delivering now,Switch to order menu,Price range,Aggregate rating,Rating color,Rating text,Votes
0,6317637,3748,162,73,8685,171,172,121.027535,14.565443,920,...,0,1,0,0,0,3,4.8,0,1,314
1,6304287,3172,162,73,6055,593,601,121.014101,14.553708,1111,...,0,1,0,0,0,3,4.5,0,1,591
2,6300002,2896,162,75,4684,308,314,121.056831,14.581404,1671,...,0,1,0,0,0,4,4.4,1,5,270
3,6318506,4707,162,75,8690,862,875,121.056475,14.585318,1126,...,0,0,0,0,0,4,4.9,0,1,365
4,6314302,5523,162,75,8689,862,875,121.057508,14.58445,1122,...,0,1,0,0,0,4,4.8,0,1,229


In [None]:
# Step 3: Define Features & Target
from sklearn.model_selection import train_test_split

X = data.drop("Cuisines", axis=1)
y = data["Cuisines"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [None]:
# Step 4: Train Classification Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# Logistic Regression
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_lr = log_reg.predict(X_test)

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [None]:
# Step 5: Evaluate Models
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

def evaluate_model(y_true, y_pred, model_name):
    print(f"\n📊 {model_name} Performance")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision (macro):", precision_score(y_true, y_pred, average="macro"))
    print("Recall (macro):", recall_score(y_true, y_pred, average="macro"))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

evaluate_model(y_test, y_pred_lr, "Logistic Regression")
evaluate_model(y_test, y_pred_rf, "Random Forest")


📊 Logistic Regression Performance
Accuracy: 0.10099424385138671
Precision (macro): 0.00023326367756585427
Recall (macro): 0.0016917541427345349

Classification Report:
               precision    recall  f1-score   support

           6       0.00      0.00      0.00         5
           8       0.00      0.00      0.00         1
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         1
          16       0.00      0.00      0.00         1
          22       0.00      0.00      0.00         1
          24       0.00      0.00      0.00         1
          27       0.00      0.00      0.00         1
          29       0.00      0.00      0.00         1
          35       0.00      0.00      0.00         1
          38       0.00      0.00      0.00         1
          40       0.00      0.00      0.00         1
          45       0.00      0.00      0.00         1
          46       

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Step 5: Evaluate Models
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

def evaluate_model(y_true, y_pred, model_name):
    print(f"\n📊 {model_name} Performance")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision (macro):", precision_score(y_true, y_pred, average="macro", zero_division=0))
    print("Recall (macro):", recall_score(y_true, y_pred, average="macro", zero_division=0))
    print("\nClassification Report:\n", classification_report(y_true, y_pred, zero_division=0))

evaluate_model(y_test, y_pred_lr, "Logistic Regression")
evaluate_model(y_test, y_pred_rf, "Random Forest")


📊 Logistic Regression Performance
Accuracy: 0.10099424385138671
Precision (macro): 0.00023326367756585427
Recall (macro): 0.0016917541427345349

Classification Report:
               precision    recall  f1-score   support

           6       0.00      0.00      0.00         5
           8       0.00      0.00      0.00         1
          11       0.00      0.00      0.00         1
          12       0.00      0.00      0.00         1
          15       0.00      0.00      0.00         1
          16       0.00      0.00      0.00         1
          22       0.00      0.00      0.00         1
          24       0.00      0.00      0.00         1
          27       0.00      0.00      0.00         1
          29       0.00      0.00      0.00         1
          35       0.00      0.00      0.00         1
          38       0.00      0.00      0.00         1
          40       0.00      0.00      0.00         1
          45       0.00      0.00      0.00         1
          46       

In [None]:
# Step 6: Feature Importance Analysis (Random Forest)
feature_importances = rf.feature_importances_
sorted_features = sorted(zip(feature_importances, X.columns), reverse=True)[:10]

print("\n🔥 Top 10 Influential Features for Cuisine Classification:")
for score, feature in sorted_features:
    print(f"{feature}: {score:.4f}")


🔥 Top 10 Influential Features for Cuisine Classification:
Restaurant Name: 0.1238
Restaurant ID: 0.1085
Address: 0.0993
Latitude: 0.0959
Longitude: 0.0942
Votes: 0.0853
Locality: 0.0803
Locality Verbose: 0.0801
Average Cost for two: 0.0740
Aggregate rating: 0.0555
