# Deployment Model

In [27]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.base import BaseEstimator, TransformerMixin, OneToOneFeatureMixin
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
import joblib

Train on the whole dataset, no hold out set

In [28]:
url = "https://raw.githubusercontent.com/CarsonShively/Heart-Failure/refs/heads/main/data/heart_failure.csv"
df = pd.read_csv(url)
df.drop_duplicates(inplace=True)

X = df.drop('DEATH_EVENT', axis=1)
y = df['DEATH_EVENT']


# Feature Engineering

In [29]:
class FeatureEngineer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self._transform_output = None

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy()

        X['ef_to_creatinine'] = X['ejection_fraction'] / X['serum_creatinine']
        X['ef_drop_per_age'] = (100 - X['ejection_fraction']) / X['age']
        X['ef_per_time'] = X['ejection_fraction'] / X['time']
        X['time_x_ef'] = X['time'] * X['ejection_fraction']
        X['creatinine_x_ef'] = X['serum_creatinine'] * X['ejection_fraction']
        X['time_x_creatinine'] = X['time'] * X['serum_creatinine']
        X.drop(columns=['diabetes', 'anaemia', 'smoking', 'sex', 'high_blood_pressure'], errors='ignore', inplace=True)

        X.replace([np.inf, -np.inf], np.nan, inplace=True)

        if self._transform_output == "pandas":
            return pd.DataFrame(X, columns=X.columns, index=X.index)
        else:
            return X

    def set_output(self, transform=None):
        self._transform_output = transform
        return self

FE_pipeline = Pipeline(steps=[
    ('FE', FeatureEngineer()),
])
FE_pipeline.set_output(transform="pandas")
_ = FE_pipeline

# Numeric Features

In [30]:
numeric_features = [
    'age',
    'creatinine_phosphokinase',
    'ejection_fraction',
    'platelets',
    'serum_creatinine',
    'serum_sodium',
    'time',
    'ef_to_creatinine',
    'ef_drop_per_age',
    'ef_per_time',
    'time_x_ef',
    'creatinine_x_ef',
    'time_x_creatinine',
]

class CoerceToFloat(BaseEstimator, TransformerMixin, OneToOneFeatureMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        X = X.copy()
        for col in self.columns:
            X[col] = X[col].astype(float)
        return X

class NumericImputer(BaseEstimator, TransformerMixin, OneToOneFeatureMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y=None):
        X = X.copy()
        self.medians_ = {
            col: X[col].median(skipna=True)
            for col in self.columns
        }
        return self

    def transform(self, X):
        X = X.copy()
        for col in self.columns:
            X[col] = X[col].fillna(self.medians_[col])
        return X

numeric_pipeline = Pipeline([
    ('float', CoerceToFloat(columns=numeric_features)),
    ('imputer', NumericImputer(columns=numeric_features)),
])

numeric_pipeline.set_output(transform="pandas")
_ = numeric_pipeline

# Pipeline

In [31]:
preprocessor = ColumnTransformer([
    ('num', numeric_pipeline, numeric_features)
])
preprocessor.set_output(transform='pandas')

best_params = {
    'n_estimators': 586,
    'max_depth': None,
    'min_samples_split': 4,
    'min_samples_leaf': 10,
    'max_features': 0.46769332346688064,
    'bootstrap': False,
    'criterion': 'gini',
    'class_weight': 'balanced',
    'random_state': 42,
    'n_jobs': -1
}

if not best_params.get('bootstrap', True):
    best_params.pop('max_samples', None)

best_rf = RandomForestClassifier(**best_params)

full_pipeline = Pipeline([
    ('feature_engineering', FeatureEngineer()),
    ('preprocessor', preprocessor),
    ('classifier', best_rf)
])
full_pipeline.set_output(transform='pandas')
_ = full_pipeline

# Deploy

In [32]:
full_pipeline.fit(X, y)

joblib.dump(full_pipeline, "model_pipeline.pkl")

['model_pipeline.pkl']