# EDA: Sample Customers

This notebook loads a tiny CSV, performs quick cleaning, and produces 6–8 charts.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

pd.set_option("display.max_columns", None)
df = pd.read_csv("../data/sample_customers.csv")
df.head()

In [None]:
df.info()

In [None]:
# Basic cleaning: handle missing income with median
df["income"] = df["income"].fillna(df["income"].median())
df.isna().sum()

In [None]:
# Summary stats
df.describe(include="all")

In [None]:
# Histogram: age
plt.figure()
df["age"].plot.hist(bins=20, edgecolor="black")
plt.title("Age Distribution")
plt.xlabel("Age")
plt.ylabel("Count")
plt.show()

In [None]:
# Histogram: income
plt.figure()
df["income"].plot.hist(bins=20, edgecolor="black")
plt.title("Income Distribution")
plt.xlabel("Income")
plt.ylabel("Count")
plt.show()

In [None]:
# Histogram: purchases
plt.figure()
df["purchases"].plot.hist(bins=15, edgecolor="black")
plt.title("Purchases Distribution")
plt.xlabel("Purchases")
plt.ylabel("Count")
plt.show()

In [None]:
# Boxplots for numeric columns
numeric_cols = ["age", "income", "purchases"]
for col in numeric_cols:
    plt.figure()
    df.boxplot(column=col)
    plt.title(f"Boxplot of {col}")
    plt.ylabel(col)
    plt.show()

In [None]:
# Correlation heatmap for numeric features
corr = df[["age", "income", "purchases"]].corr()
plt.figure()
plt.imshow(corr, interpolation="nearest")
plt.xticks(range(len(corr.columns)), corr.columns, rotation=45, ha="right")
plt.yticks(range(len(corr.columns)), corr.columns)
plt.colorbar()
plt.title("Correlation Heatmap")
plt.tight_layout()
plt.show()

**Takeaways**

- Write 3–5 bullet insights here after looking at the figures.