In [3]:
!pip install onnx onnxruntime tf2onnx skl2onnx joblib scikit-learn tensorflow

Collecting onnx
  Downloading onnx-1.17.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting tf2onnx
  Downloading tf2onnx-1.16.1-py3-none-any.whl.metadata (1.3 kB)
Collecting skl2onnx
  Downloading skl2onnx-1.17.0-py2.py3-none-any.whl.metadata (3.2 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting protobuf>=3.20.2 (from onnx)
  Downloading protobuf-3.20.3-cp310-cp310-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (679 bytes)
Collecting onnxconverter-common>=1.7.0 (from skl2onnx)
  Downloading onnxconverter_common-1.14.0-py2.py3-none-any.whl.metadata (4.2 kB)
INFO: pip is looking at multiple versions of onnxconverter-common to determine which version is compatible with other requirements. This could take a while.
  Downloading onnxco

## Import library

In [4]:
import os
import time
import pickle
import joblib
import onnx
import tf2onnx
import tensorflow as tf
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt

## Persiapan Data dan Model

In [6]:
# Load dataset breast cancer
data = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Test accuracy
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))

Model Accuracy: 0.9649122807017544


## Serialization dengan Pickle

In [7]:
# File path
pickle_file = "model.pkl"

# Save model
start = time.time()
with open(pickle_file, "wb") as f:
    pickle.dump(model, f)
pickle_save_time = time.time() - start

# Load model
start = time.time()
with open(pickle_file, "rb") as f:
    pickle_model = pickle.load(f)
pickle_load_time = time.time() - start

# File size
pickle_size = os.path.getsize(pickle_file)

## Serialization dengan Joblib

In [8]:
# File path
joblib_file = "model.joblib"

# Save model
start = time.time()
joblib.dump(model, joblib_file)
joblib_save_time = time.time() - start

# Load model
start = time.time()
joblib_model = joblib.load(joblib_file)
joblib_load_time = time.time() - start

# File size
joblib_size = os.path.getsize(joblib_file)

## Serialization dengan ONNX

In [9]:
# File path
onnx_file = "model.onnx"

# Convert model to ONNX
initial_type = [("float_input", FloatTensorType([None, X_train.shape[1]]))]
onnx_model = convert_sklearn(model, initial_types=initial_type)

# Save model
start = time.time()
with open(onnx_file, "wb") as f:
    f.write(onnx_model.SerializeToString())
onnx_save_time = time.time() - start

# Load model
start = time.time()
onnx_session = rt.InferenceSession(onnx_file)
onnx_load_time = time.time() - start

# File size
onnx_size = os.path.getsize(onnx_file)

## Serialization dengan TensorFlow

In [10]:
# Define and compile model
tf_model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(1, activation='sigmoid')
])
tf_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train model
tf_model.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)

# File path
tf_file = "model_tf.h5"

# Save model
start = time.time()
tf_model.save(tf_file, save_format="h5")
tf_save_time = time.time() - start

# Load model
start = time.time()
loaded_tf_model = tf.keras.models.load_model(tf_file)
tf_load_time = time.time() - start

# File size
tf_size = os.path.getsize(tf_file)



## Perbandingan Hasil

In [11]:
# Store results
results = {
    "Pickle": (pickle_save_time, pickle_load_time, pickle_size),
    "Joblib": (joblib_save_time, joblib_load_time, joblib_size),
    "ONNX": (onnx_save_time, onnx_load_time, onnx_size),
    "TensorFlow": (tf_save_time, tf_load_time, tf_size)
}

# Print Results
print("\nComparison Results (Save Time, Load Time, File Size):")
for method, (save_time, load_time, size) in results.items():
    print(f"{method}: Save Time={save_time:.4f}s, Load Time={load_time:.4f}s, File Size={size / 1024:.2f} KB")


Comparison Results (Save Time, Load Time, File Size):
Pickle: Save Time=0.0146s, Load Time=0.0052s, File Size=309.79 KB
Joblib: Save Time=0.0507s, Load Time=0.0384s, File Size=317.68 KB
ONNX: Save Time=0.0024s, Load Time=0.0065s, File Size=128.19 KB
TensorFlow: Save Time=0.0774s, Load Time=0.1332s, File Size=17.54 KB
