# 6. Saving and Loading Trained Machine Learning Models

Two ways:
1. Python's `picle` module
2. Python's `joblib` module

In [1]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


def evaluate_preds(y_true, y_pred):
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    metric_dict = {"accuracy": round(accuracy, 2), "precision": round(precision, 2), "recall": round(recall, 2),
                   "f1": round(f1, 2)}

    print(f"Accuracy: {accuracy * 100:.2f}%")
    print(f"Precision: {precision:.2f}%")
    print(f"Recall: {recall:.2f}%")
    print(f"F1: {f1:.2f}%")

    return metric_dict

In [2]:
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Set random seed for reproducibility
np.random.seed(42)

# Load the data
heart_disease = pd.read_csv("resources/heart-disease.csv")

# Shuffle the data
heart_disease_shuffled = heart_disease.sample(frac=1, random_state=42)

# Split into X and y
X = heart_disease_shuffled.drop("target", axis=1)
y = heart_disease_shuffled["target"]

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the model
clf = RandomForestClassifier(n_estimators=100, min_samples_split=6, min_samples_leaf=2, max_depth=30, random_state=42)
clf.fit(X_train, y_train)

# Make predictions
y_preds = clf.predict(X_test)

# Evaluate predictions
metrics = evaluate_preds(y_test, y_preds)

Accuracy: 81.97%
Precision: 0.77%
Recall: 0.86%
F1: 0.81%


## Picle

### Saving

In [3]:
import pickle

pickle.dump(clf, open("models/clf.pk1", "wb"))

### Loading

In [4]:
loaded_model = pickle.load(open("models/clf.pk1", "rb"))

### Making predictions

In [5]:
y_preds = loaded_model.predict(X_test)
evaluate_preds(y_test, y_preds)

Accuracy: 81.97%
Precision: 0.77%
Recall: 0.86%
F1: 0.81%


{'accuracy': 0.82, 'precision': 0.77, 'recall': 0.86, 'f1': 0.81}

## Joblib (Preferable)

## Saving

In [8]:
from joblib import dump, load

dump(clf, filename="models/clf.joblib")

['models/clf.joblib']

## Loading

In [9]:
joblib_loaded_model = load("models/clf.joblib")

In [12]:
y_preds = joblib_loaded_model.predict(X_test)

evaluate_preds(y_test, y_preds)

Accuracy: 81.97%
Precision: 0.77%
Recall: 0.86%
F1: 0.81%


{'accuracy': 0.82, 'precision': 0.77, 'recall': 0.86, 'f1': 0.81}