# 01 — Exploration & EDA
Loads a CSV and performs quick EDA. The active dataset should be at `../data/raw/transactions.csv`.
Use `python scripts/switch_dataset.py --use kaggle --kaggle-path /path/to/creditcard.csv` to switch.

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

DATA_PATH = Path("../data/raw/transactions.csv")
if not DATA_PATH.exists():
    # default to tiny demo if switch script was not used
    DATA_PATH = Path("../data/raw/tiny_transactions.csv")

df = pd.read_csv(DATA_PATH)
df.head()


## Basic Info & Class Imbalance

In [None]:

df.info()
class_counts = df['Class'].value_counts().sort_index()
class_ratio = class_counts / len(df)
print("Counts:", class_counts.to_dict())
print("Ratio:", class_ratio.to_dict())

sns.countplot(x='Class', data=df)
plt.title('Target Distribution')
plt.show()


## Numeric Summaries

In [None]:

df.describe()


## Amount Distribution

In [None]:

sns.histplot(df['Amount'], bins=50)
plt.title('Amount Distribution')
plt.show()


## Correlations (subset for readability)

In [None]:

num_cols = [c for c in df.columns if c not in ['Class']]
corr = df[num_cols].corr()
plt.figure(figsize=(6,5))
sns.heatmap(corr, cmap='coolwarm', annot=False)
plt.title('Feature Correlation Heatmap')
plt.show()
