# 🍽️ Task 3: Cuisine Classification (Google Colab Project)
This notebook builds a **classification model** to predict restaurant cuisines.

In [None]:
# Step 1: Upload Dataset
from google.colab import files
uploaded = files.upload()

import pandas as pd
data = pd.read_csv("Datasetml.csv")
print(data.head())
print(data.columns)
print(data.info())
print(data.isnull().sum())

In [None]:
# Step 2: Preprocessing
from sklearn.preprocessing import LabelEncoder

# Handle missing values
for col in data.columns:
    if data[col].dtype == 'object':
        data[col].fillna(data[col].mode()[0], inplace=True)
    else:
        data[col].fillna(data[col].mean(), inplace=True)

# Encode categorical columns
label_encoders = {}
for col in data.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])
    label_encoders[col] = le

data.head()

In [None]:
# Step 3: Define Features & Target
from sklearn.model_selection import train_test_split

X = data.drop("Cuisines", axis=1)
y = data["Cuisines"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [None]:
# Step 4: Train Classification Models
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

# Logistic Regression
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_lr = log_reg.predict(X_test)

# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

In [None]:
# Step 5: Evaluate Models
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report

def evaluate_model(y_true, y_pred, model_name):
    print(f"\n📊 {model_name} Performance")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Precision (macro):", precision_score(y_true, y_pred, average="macro"))
    print("Recall (macro):", recall_score(y_true, y_pred, average="macro"))
    print("\nClassification Report:\n", classification_report(y_true, y_pred))

evaluate_model(y_test, y_pred_lr, "Logistic Regression")
evaluate_model(y_test, y_pred_rf, "Random Forest")

In [None]:
# Step 6: Feature Importance Analysis (Random Forest)
feature_importances = rf.feature_importances_
sorted_features = sorted(zip(feature_importances, X.columns), reverse=True)[:10]

print("\n🔥 Top 10 Influential Features for Cuisine Classification:")
for score, feature in sorted_features:
    print(f"{feature}: {score:.4f}")