# Tips

### Step 1: Import the necessary libraries

In [None]:
# Install necessary dependencies
# !pip install pandas seaborn matplotlib

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

### Step 2: Import the dataset from the given address

In [None]:
url = "https://raw.githubusercontent.com/mwaskom/seaborn-data/master/tips.csv"
tips = pd.read_csv(url)

### Step 3: Assign it to a variable called `tips`

In [None]:
tips.head()

### Step 4: Delete the `Unnamed: 0` column

In [None]:
tips = tips.loc[:, ~tips.columns.str.contains('^Unnamed')]

### Step 5: Plot the `total_bill` column histogram

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(tips['total_bill'], kde=True, color='blue')
plt.title('Total Bill Histogram')
plt.xlabel('Total Bill')
plt.ylabel('Frequency')
plt.show()

### Step 6: Create a scatter plot presenting the relationship between `total_bill` and `tip`

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='total_bill', y='tip', data=tips, color='green')
plt.title('Total Bill vs Tip')
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.show()

### Step 7: Create one image with the relationship of `total_bill`, `tip`, and `size`

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='total_bill', y='tip', size='size', hue='size', data=tips, palette='viridis', alpha=0.6)
plt.title('Total Bill vs Tip by Size')
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.legend(title='Size')
plt.show()

### Step 8: Present the relationship between `day` and `total_bill` value

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='day', y='total_bill', data=tips, palette='pastel')
plt.title('Total Bill by Day')
plt.xlabel('Day')
plt.ylabel('Total Bill')
plt.show()

### Step 9: Create a scatter plot with the `day` as the y-axis and `tip` as the x-axis, differentiating by `sex`

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='tip', y='day', hue='sex', data=tips, palette='coolwarm', alpha=0.7)
plt.title('Tip vs Day by Sex')
plt.xlabel('Tip')
plt.ylabel('Day')
plt.legend(title='Sex')
plt.show()

### Step 10: Create a box plot presenting the `total_bill` per day, differentiating the `time` (Dinner or Lunch)

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='day', y='total_bill', hue='time', data=tips, palette='muted')
plt.title('Total Bill by Day and Time')
plt.xlabel('Day')
plt.ylabel('Total Bill')
plt.legend(title='Time')
plt.show()

### Step 11: Create two histograms of the `tip` value for Dinner and Lunch side by side

In [None]:
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.histplot(tips[tips['time'] == 'Dinner']['tip'], kde=True, color='blue')
plt.title('Tip Distribution (Dinner)')
plt.xlabel('Tip')
plt.ylabel('Frequency')

plt.subplot(1, 2, 2)
sns.histplot(tips[tips['time'] == 'Lunch']['tip'], kde=True, color='orange')
plt.title('Tip Distribution (Lunch)')
plt.xlabel('Tip')
plt.ylabel('Frequency')

plt.tight_layout()
plt.show()

### Step 12: Create two scatterplots for Male and Female, showing `total_bill` vs `tip`, differing by `smoker`

In [None]:
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
sns.scatterplot(x='total_bill', y='tip', hue='smoker', data=tips[tips['sex'] == 'Male'], palette='coolwarm', alpha=0.7)
plt.title('Male: Total Bill vs Tip by Smoker')
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.legend(title='Smoker')

plt.subplot(1, 2, 2)
sns.scatterplot(x='total_bill', y='tip', hue='smoker', data=tips[tips['sex'] == 'Female'], palette='coolwarm', alpha=0.7)
plt.title('Female: Total Bill vs Tip by Smoker')
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.legend(title='Smoker')

plt.tight_layout()
plt.show()

### BONUS: Create your own question and answer it using a graph

In [None]:
# BONUS: What is the distribution of total_bill for smokers and non-smokers?
plt.figure(figsize=(10, 6))
sns.kdeplot(data=tips, x='total_bill', hue='smoker', fill=True, palette='muted', alpha=0.6)
plt.title('Distribution of Total Bill by Smoking Status')
plt.xlabel('Total Bill')
plt.ylabel('Density')
plt.show()