# Week 10 â€” Day 7 : Packaging the Pipeline

### Imports and Loads

In [9]:
import sys
import joblib
import numpy as np
import pandas as pd
from pathlib import Path

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline

PROJECT_ROOT = Path("..").resolve()
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

print("Project root added to path:", PROJECT_ROOT)

Project root added to path: C:\Users\saifu_y\ML-Projects\Fraud Detection System


In [2]:
ARTIFACTS_DIR = Path("..") / "models"
ARTIFACTS_DIR.mkdir(exist_ok=True)

best_rf = joblib.load(ARTIFACTS_DIR / "rf_fe_tuned_v1.joblib")
threshold = joblib.load(ARTIFACTS_DIR / "rf_threshold_v1.joblib")  # 0.01

print("Loaded tuned RF + threshold:", threshold)

Loaded tuned RF + threshold: 0.01


### Feature Engineering Transformer

In [10]:
from src.feature_engineering import FeatureEngineer

### Building Pipeline

In [4]:
final_pipeline = Pipeline(steps=[
    ("feature_engineering", FeatureEngineer()),
    ("model", best_rf)
])

print(final_pipeline)

Pipeline(steps=[('feature_engineering', FeatureEngineer()),
                ('model',
                 RandomForestClassifier(class_weight='balanced_subsample',
                                        min_samples_leaf=2, n_estimators=200,
                                        n_jobs=-1, random_state=42))])


**Quick Checkup on Existing Test Split**

In [5]:
X_train, X_test, y_train, y_test = joblib.load(ARTIFACTS_DIR / "split_v1.joblib")

y_prob = final_pipeline.predict_proba(X_test)[:, 1]
y_pred = (y_prob >= threshold).astype(int)

print("Example probabilities:", y_prob[:5])
print("Example predictions:", y_pred[:5])

Example probabilities: [0.         0.         0.00998234 0.         0.00496342]
Example predictions: [0 0 0 0 0]


**Save the Pipeline**

In [11]:
import joblib
from sklearn.pipeline import Pipeline

# paths
ARTIFACTS_DIR = Path("..") / "models"

# load best tuned model and threshold
best_rf = joblib.load(ARTIFACTS_DIR / "rf_fe_tuned_v1.joblib")
threshold = joblib.load(ARTIFACTS_DIR / "rf_threshold_v1.joblib")

# rebuild pipeline using IMPORTED FeatureEngineer
final_pipeline = Pipeline(steps=[
    ("feature_engineering", FeatureEngineer()),
    ("model", best_rf)
])

# save deployable artifacts
joblib.dump(final_pipeline, ARTIFACTS_DIR / "fraud_pipeline_rf_v1.joblib")
joblib.dump(threshold, ARTIFACTS_DIR / "fraud_threshold_v1.joblib")

print("Re-saved pipeline using importable FeatureEngineer.")

Re-saved pipeline using importable FeatureEngineer.


In [7]:
sample = X_test.sample(200, random_state=42)
sample.to_csv("../data/sample_input.csv", index=False)

### Packaging Checkpoint (Week 10 Day 7)

- Packaged final pipeline (Feature Engineering + Tuned Random Forest) and saved as `models/fraud_pipeline_rf_v1.joblib`
- Saved tuned threshold as `models/fraud_threshold_v1.joblib`
- Created CLI inference script: `src/predict.py`
- Successfully ran inference on full dataset:

Command:
`python -m src.predict --input data/raw/creditcard.csv --output reports/predictions_full.csv`

Output:
- Threshold: 0.01
- Predicted fraud: 1843 / 284,807 (~0.65%)