# 🚬 Smoking Behavior Analysis

This Jupyter Notebook explores demographic and behavioral trends in smoking using a real-world UK dataset. It includes:
- Exploratory Data Analysis
- Visualizations by age, gender, income, region, and more
- Real-world implications and public health insights


In [None]:
# 📥 Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set styling
sns.set(style='whitegrid')


In [None]:
# 📂 Load Data
df = pd.read_csv("smoking.csv")
df['smoke'] = df['smoke'].str.strip()
df['amt_weekends'] = pd.to_numeric(df['amt_weekends'], errors='coerce')
df['amt_weekdays'] = pd.to_numeric(df['amt_weekdays'], errors='coerce')
smokers_df = df[df['smoke'] == 'Yes'][['amt_weekends', 'amt_weekdays']].dropna()
df.head()

## 📊 Smoking Status by Gender

In [None]:
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x='gender', hue='smoke', palette='Set2')
plt.title("Smoking Status by Gender")
plt.xlabel("Gender")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

## 🎓 Smoking by Education Level

In [None]:
plt.figure(figsize=(12, 6))
qual_order = df['highest_qualification'].value_counts().index
sns.countplot(data=df, x='highest_qualification', hue='smoke', order=qual_order, palette='coolwarm')
plt.title("Smoking by Education")
plt.xlabel("Highest Qualification")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

## 🧓 Age Distribution by Smoking Status

In [None]:
plt.figure(figsize=(8, 5))
sns.histplot(data=df, x='age', hue='smoke', multiple='stack', palette='Set1', bins=30)
plt.title("Age Distribution by Smoking Status")
plt.xlabel("Age")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

## 🚬 Cigarette Consumption (Smokers Only)

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(smokers_df['amt_weekends'], color='blue', label='Weekends', kde=True, bins=25)
sns.histplot(smokers_df['amt_weekdays'], color='green', label='Weekdays', kde=True, bins=25)
plt.title("Cigarette Consumption (Smokers Only)")
plt.xlabel("Number of Cigarettes")
plt.ylabel("Frequency")
plt.legend()
plt.tight_layout()
plt.show()

## 💼 Smoking Status by Gross Income

In [None]:
plt.figure(figsize=(12, 6))
income_order = df['gross_income'].value_counts().index
sns.countplot(data=df, x='gross_income', hue='smoke', order=income_order, palette='Paired')
plt.title("Smoking Status by Gross Income")
plt.xlabel("Gross Income")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()