In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Set style
sns.set(style="whitegrid")
%matplotlib inline

# Load dataset
df = pd.read_csv("Zomato-data-.csv")

# Display basic info
print("Shape:", df.shape)
df.head()


In [None]:
# Drop duplicates
df.drop_duplicates(inplace=True)

# Clean the 'rate' column
def handle_rate(value):
    try:
        value = str(value).split('/')[0].strip()
        return float(value) if value not in ['NEW', '-', 'nan'] else np.nan
    except:
        return np.nan

df['rate'] = df['rate'].apply(handle_rate)

# Remove commas and convert cost to float
df['approx_cost(for two people)'] = df['approx_cost(for two people)'].astype(str).str.replace(',', '').str.strip()
df['approx_cost(for two people)'] = pd.to_numeric(df['approx_cost(for two people)'], errors='coerce')

# Fill missing values
df.fillna({
    'rate': df['rate'].mean(),
    'approx_cost(for two people)': df['approx_cost(for two people)'].mean()
}, inplace=True)

df.info()


## **Online vs Offline Delivery**

In [None]:
sns.countplot(x='online_order', data=df, palette='coolwarm')
plt.title("Online vs Offline Delivery")
plt.show()


## **Most Favored Restaurant Types**

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(y='listed_in(type)', data=df, order=df['listed_in(type)'].value_counts().index, palette='Set2')
plt.title("Most Popular Restaurant Types")
plt.xlabel("Count")
plt.ylabel("Type")
plt.show()


## **Couples’ Preferred Price Range**

In [None]:
# Filter cafes and fine dining places
romantic_df = df[df['listed_in(type)'].str.contains("Caf|Casual|Fine", case=False, na=False)]

plt.figure(figsize=(12,6))
sns.boxplot(x='listed_in(type)', y='approx_cost(for two people)', data=romantic_df)
plt.title("Price Range Preferred by Couples")
plt.xticks(rotation=45)
plt.show()


## **Average Rating by Restaurant Type**

In [None]:
plt.figure(figsize=(12,6))
sns.barplot(x='rate', y='listed_in(type)', data=df, estimator=np.mean, ci=None, palette='viridis')
plt.title("Average Rating by Restaurant Type")
plt.show()


## **Votes vs Cost Correlation**

In [None]:
sns.jointplot(data=df, x='approx_cost(for two people)', y='votes', kind='scatter', color='green')
plt.suptitle("Votes vs Cost", y=1.02)
plt.show()

The analysis shows that online delivery, casual dining, and moderately priced North Indian and Chinese cuisines are most preferred by customers. These insights can guide strategic decisions in the food industry.
