In [6]:
%%writefile measure_model_size.py
import os
from pathlib import Path

MODEL_PATH = Path("models/rf_smote70_classweight.joblib")

if not MODEL_PATH.exists():
    raise SystemExit(f"Model file not found: {MODEL_PATH}")

size_bytes = MODEL_PATH.stat().st_size
size_mb = size_bytes / (1024**2)

print(f"Model path: {MODEL_PATH}")
print(f"Size: {size_bytes} bytes ({size_mb:.2f} MB)")


Overwriting measure_model_size.py


In [8]:
!python measure_model_size.py


Model path: models\rf_smote70_classweight.joblib
Size: 16735513 bytes (15.96 MB)


In [10]:
%%writefile measure_inference_latency.py
import time
from pathlib import Path

import joblib
import numpy as np
import pandas as pd

MODEL_PATH = Path("models/rf_smote70_classweight.joblib")
TEST_INPUT_PATH = Path("X_test_proc_for_app.csv")  # adjust if different

# Load model
if not MODEL_PATH.exists():
    raise SystemExit(f"Model file not found: {MODEL_PATH}")
rf = joblib.load(MODEL_PATH)

# Load test data
if not TEST_INPUT_PATH.exists():
    raise SystemExit(f"Test input file not found: {TEST_INPUT_PATH}")

print(f"Loading test data from: {TEST_INPUT_PATH}")
df = pd.read_csv(TEST_INPUT_PATH)

# If your CSV contains a label column, drop it here, e.g.:
# df = df.drop(columns=["label"])
# or df = df.iloc[:, :-1]

X = df.values
n_samples, n_features = X.shape
print(f"Loaded test batch: {n_samples} samples, {n_features} features")

# Warm-up to stabilise timing
for _ in range(3):
    _ = rf.predict_proba(X[:100])

# Time a full-batch predict_proba
start = time.perf_counter()
probs = rf.predict_proba(X)
end = time.perf_counter()

total_s = end - start
avg_ms = (total_s / n_samples) * 1000
throughput = n_samples / total_s

print("\n=== Inference timing ===")
print(f"Total time: {total_s:.4f} s for {n_samples} samples")
print(f"Average inference time per flow: {avg_ms:.3f} ms")
print(f"Throughput: {throughput:.1f} flows/s")


Writing measure_inference_latency.py


In [12]:
!python measure_inference_latency.py


Loading test data from: X_test_proc_for_app.csv
Loaded test batch: 103784 samples, 81 features

=== Inference timing ===
Total time: 0.1953 s for 103784 samples
Average inference time per flow: 0.002 ms
Throughput: 531496.3 flows/s


In [14]:
%%writefile measure_memory_inference.py
import os
import time
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
import psutil

MODEL_PATH = Path("models/rf_smote70_classweight.joblib")
TEST_INPUT_PATH = Path("X_test_proc_for_app.csv")  # adjust if needed

proc = psutil.Process(os.getpid())
print(f"PID: {proc.pid}")

# Load model
if not MODEL_PATH.exists():
    raise SystemExit(f"Model file not found: {MODEL_PATH}")
rf = joblib.load(MODEL_PATH)
print("Model loaded")

# Load test data
if not TEST_INPUT_PATH.exists():
    raise SystemExit(f"Test input file not found: {TEST_INPUT_PATH}")

df = pd.read_csv(TEST_INPUT_PATH)

# If CSV includes label column, drop it:
# df = df.drop(columns=["label"])
# or df = df.iloc[:, :-1]

X = df.values
print(f"Loaded X: {X.shape}")

# Measure RSS before inference
mem_before = proc.memory_info().rss / (1024**2)

start = time.perf_counter()
_ = rf.predict_proba(X)
end = time.perf_counter()

mem_after = proc.memory_info().rss / (1024**2)

print("\n=== Memory during inference ===")
print(f"Memory before: {mem_before:.2f} MB")
print(f"Memory after:  {mem_after:.2f} MB")
print(f"Delta memory:  {mem_after - mem_before:.2f} MB")
print(f"Inference time: {end - start:.4f} s for {X.shape[0]} samples")


Writing measure_memory_inference.py


In [16]:
!python measure_memory_inference.py


PID: 3180
Model loaded
Loaded X: (103784, 81)

=== Memory during inference ===
Memory before: 233.02 MB
Memory after:  233.59 MB
Delta memory:  0.57 MB
Inference time: 0.1969 s for 103784 samples


In [18]:
%%writefile measure_shap_time.py
import time
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
import shap

MODEL_PATH = Path("models/rf_smote70_classweight.joblib")
TEST_INPUT_PATH = Path("X_test_proc_for_app.csv")  # adjust if needed
N = 200  # number of samples for SHAP

# Load model
if not MODEL_PATH.exists():
    raise SystemExit(f"Model file not found: {MODEL_PATH}")
rf = joblib.load(MODEL_PATH)

# Load data
if not TEST_INPUT_PATH.exists():
    raise SystemExit(f"Test input file not found: {TEST_INPUT_PATH}")

df = pd.read_csv(TEST_INPUT_PATH)

# If CSV includes label column, drop it:
# df = df.drop(columns=["label"])
# or df = df.iloc[:, :-1]

X = df.values

if X.shape[0] < N:
    N = X.shape[0]
    print(f"Warning: only {N} samples available; using all of them.")

Xsub = X[:N]
print(f"Computing SHAP values for N = {N} samples, {Xsub.shape[1]} features")

explainer = shap.TreeExplainer(rf)

start = time.perf_counter()
shap_values = explainer.shap_values(Xsub)
end = time.perf_counter()

print("\n=== SHAP timing ===")
print(f"Time to compute SHAP for {N} samples: {end - start:.3f} s")


Writing measure_shap_time.py


In [20]:
!python measure_shap_time.py


Computing SHAP values for N = 200 samples, 81 features

=== SHAP timing ===
Time to compute SHAP for 200 samples: 8.202 s


In [22]:
%%writefile profile_all.py
import os
import time
from pathlib import Path

import joblib
import numpy as np
import pandas as pd
import psutil

MODEL_PATH = Path("models/rf_smote70_classweight.joblib")
TEST_INPUT_PATH = Path("X_test_proc_for_app.csv")  # adjust if needed

proc = psutil.Process(os.getpid())

print("=== Model size ===")
if not MODEL_PATH.exists():
    raise SystemExit(f"Model file not found: {MODEL_PATH}")

size_mb = MODEL_PATH.stat().st_size / (1024**2)
print(f"Path: {MODEL_PATH}")
print(f"Size: {size_mb:.2f} MB")

# Load model
rf = joblib.load(MODEL_PATH)

# Load data
if not TEST_INPUT_PATH.exists():
    raise SystemExit(f"Test input file not found: {TEST_INPUT_PATH}")

df = pd.read_csv(TEST_INPUT_PATH)

# If CSV includes label column, drop it:
# df = df.drop(columns=["label"])
# or df = df.iloc[:, :-1]

X = df.values
n_samples, n_features = X.shape
print(f"\nTest batch: {n_samples} samples, {n_features} features")

# Warm-up
for _ in range(2):
    _ = rf.predict_proba(X[:100])

mem_before = proc.memory_info().rss / (1024**2)
t0 = time.perf_counter()
_ = rf.predict_proba(X)
t1 = time.perf_counter()
mem_after = proc.memory_info().rss / (1024**2)

total_s = t1 - t0
avg_ms = total_s / n_samples * 1000
throughput = n_samples / total_s

print("\n=== Inference profile ===")
print(f"Time for full batch: {total_s:.4f} s")
print(f"Average per-sample latency: {avg_ms:.3f} ms")
print(f"Throughput: {throughput:.1f} flows/s")
print(f"Memory before: {mem_before:.2f} MB")
print(f"Memory after:  {mem_after:.2f} MB")
print(f"Delta memory:  {mem_after - mem_before:.2f} MB")


Writing profile_all.py


In [24]:
!python profile_all.py


=== Model size ===
Path: models\rf_smote70_classweight.joblib
Size: 15.96 MB

Test batch: 103784 samples, 81 features

=== Inference profile ===
Time for full batch: 0.1793 s
Average per-sample latency: 0.002 ms
Throughput: 578720.3 flows/s
Memory before: 233.84 MB
Memory after:  234.70 MB
Delta memory:  0.86 MB
