# Tips Dataset — Analysis & Visualizations
This notebook contains the code to analyze the `tips.csv` dataset and generate visualizations.

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style='whitegrid', palette='pastel')
df = pd.read_csv('tips.csv')
df.columns = [c.strip().lower().replace(' ', '_') for c in df.columns]
df.head()


## Scatter: Total bill vs Tip


In [None]:
plt.figure()
plt.scatter(df['total_bill'], df['tip'], alpha=0.6)
plt.xlabel('Total bill ($)')
plt.ylabel('Tip ($)')
plt.title('Total bill vs Tip')
plt.show()


## Average Tip by Day


In [None]:
import seaborn as sns
order = df.groupby('day')['tip'].mean().sort_values(ascending=False).index
sns.barplot(x='day', y='tip', data=df, order=order)
plt.title('Average Tip by Day')
plt.show()


## Tip Distribution by Gender and Smoker


In [None]:
sns.boxplot(x='sex', y='tip', data=df)
plt.title('Tip by Gender')
plt.show()
sns.boxplot(x='smoker', y='tip', data=df)
plt.title('Tip by Smoker')
plt.show()


## Tip Percentage vs Total Bill


In [None]:
df['tip_pct'] = (df['tip'] / df['total_bill']) * 100
sns.scatterplot(x='total_bill', y='tip_pct', hue='sex', data=df)
plt.title('Tip % vs Total Bill')
plt.show()


## Correlation Heatmap


In [None]:
num_cols = df.select_dtypes(include=['float64','int64']).columns
sns.heatmap(df[num_cols].corr(), annot=True, fmt='.2f', cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()
