# 🔍 Exploratory Data Analysis (EDA)

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

## 1. Load and Preview Data

In [None]:
data = pd.DataFrame({
    'age': [25, 30, 45, None, 40, 35, 30],
    'salary': [50000, 60000, 80000, 75000, None, 62000, 61000],
    'department': ['HR', 'IT', 'IT', 'HR', 'Finance', 'Finance', 'HR']
})
data.head()

## 2. Basic Info & Statistics

In [None]:
print("📐 Shape:", data.shape)
print("\n📋 Info:")
data.info()

print("\n📊 Summary:")
data.describe(include='all')

## 3. Missing Values

In [None]:
print("🧩 Missing values per column:")
print(data.isnull().sum())

## 4. Duplicates

In [None]:
print("🔁 Number of duplicates:", data.duplicated().sum())

## 5. Histograms

In [None]:
data.hist(figsize=(8,4), bins=5)
plt.tight_layout()
plt.show()

## 6. Boxplots (Outliers)

In [None]:
sns.boxplot(x=data['salary'])
plt.title('Boxplot: Salary')
plt.show()

## 7. Grouped Analysis

In [None]:
print("📂 Mean salary by department:")
print(data.groupby('department')['salary'].mean())