# Pipeline Interactive Notebook
Demonstration of each stage in the function-based pipeline.

In [None]:
from pipeline import (
    ingest,
    inspect,
    split,
    impute,
    scale_transform,
    detect_outliers,
    encode,
    run_all
)


## Stage 0: Data Ingestion

In [None]:
df = ingest("data/raw/input.csv", mode="file", output_dir="outputs")df.head()

## Stage 1: Data Inspection

In [None]:
inspect(df, target_column="TARGET", output_dir="outputs")

## Stage 2: Train/Val/Test Split

In [None]:
train, val, test = split(
    df,
    target_column='TARGET',
    test_size=0.2,
    val_size=0.1,
    random_state=42,
    output_dir='outputs'
)
train.shape, val.shape, test.shape

## Stage 3: Imputation

In [None]:
train_i, val_i, test_i = impute(train, val, test, random_state=42, output_dir='outputs')

## Stage 4: Scaling & Transformation

In [None]:
train_s, val_s, test_s = scale_transform(train_i, val_i, test_i, output_dir='outputs')

## Stage 5: Outlier Detection

In [None]:
train_o, val_o, test_o = detect_outliers(train_s, val_s, test_s, output_dir='outputs')

## Stage 6: Encoding

In [None]:
train_e, val_e, test_e = encode(train_o, val_o, test_o, output_dir='outputs')

## Run the Full Pipeline

In [None]:
train_e, val_e, test_e = run_all(
    source='data/raw/input.csv',
    mode='file',
    target_column='TARGET',
    test_size=0.1,
    val_size=0.1,
    random_state=42,
    output_dir='outputs'
)