In [1]:
"""
Run predictions simultaneously on X threads and measure wall-clock time.

Assumes:
 - You already created "SVM.pkl" as in your snippet (pickle-dumped sklearn SVC).
 - You already have pandas and scikit-learn installed.

Usage:
  python threaded_svm_test.py
"""

'\nRun predictions simultaneously on X threads and measure wall-clock time.\n\nAssumes:\n - You already created "SVM.pkl" as in your snippet (pickle-dumped sklearn SVC).\n - You already have pandas and scikit-learn installed.\n\nUsage:\n  python threaded_svm_test.py\n'

In [2]:

import os
import time
import pickle
import threading
from typing import List
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris

In [3]:

# ---- Tweak these for experiments ----
MODEL_PATH = "SVM.pkl"
THREAD_COUNTS = [1, 50, 100, 500, 1000, 2000]     # numbers of threads to test
REPEATS_PER_THREAD = 5          # how many times each thread calls predict (to amplify time)
WARMUP = True                    # do a warmup predict to avoid first-call overhead in timing

In [4]:
def load_data() -> np.ndarray:
    """Load iris dataset into numpy array (same as your train_set)."""
    iris = load_iris()
    X = iris["data"]  # shape (150, 4)
    return X

def load_model(path: str):
    """Load pickled sklearn model."""
    with open(path, "rb") as fh:
        model = pickle.load(fh)
    return model

def worker_predict(thread_id: int, model, X: np.ndarray, barrier: threading.Barrier, repeats: int, results: List[float]):
    """
    Each thread waits on the barrier (ensuring near-simultaneous start),
    then runs predict() repeats times. It records its own elapsed time into results[thread_id].
    """
    # Wait until main thread and all workers are ready
    barrier.wait()

    t0 = time.perf_counter()
    for _ in range(repeats):
        _ = model.predict(X)   # we discard predictions; focus is on timing
    t1 = time.perf_counter()

    results[thread_id] = t1 - t0

def run_threads(num_threads: int, model_path: str, repeats: int, warmup: bool = True):
    """Spawn num_threads threads and measure wall time from simultaneous start to all threads finished."""
    X = load_data()
    model = load_model(model_path)

    if warmup:
        # Do a single warmup predict to avoid one-time overheads being included in timings
        _ = model.predict(X)

    # Prepare synchronization barrier: main thread + worker threads
    barrier = threading.Barrier(num_threads + 1)

    # Container to hold per-thread timings
    results = [None] * num_threads
    threads = []

    # Create threads
    for i in range(num_threads):
        th = threading.Thread(target=worker_predict, args=(i, model, X, barrier, repeats, results), daemon=True)
        threads.append(th)
        th.start()

    # All threads now started and waiting on barrier -> record time then release them
    t_start = time.perf_counter()
    barrier.wait()   # releases all worker threads to start predict at nearly the same instant

    # Wait for all threads to finish
    for th in threads:
        th.join()
    t_end = time.perf_counter()

    wall_time = t_end - t_start
    per_thread_times = results

    return {
        "num_threads": num_threads,
        "repeats_per_thread": repeats,
        "wall_time": wall_time,
        "per_thread_times": per_thread_times,
        "avg_per_thread_time": np.mean(per_thread_times),
        "max_per_thread_time": np.max(per_thread_times),
        "min_per_thread_time": np.min(per_thread_times),
    }

In [5]:

print("Threaded SVM predict test\n")
print(f"Model file: {MODEL_PATH}")
print(f"Repeats per thread: {REPEATS_PER_THREAD}")

print()

Threaded SVM predict test

Model file: SVM.pkl
Repeats per thread: 5



In [6]:

results_list = []
for n in THREAD_COUNTS:
    print(f"Running with {n} thread(s)...", end=" ", flush=True)
    res = run_threads(n, MODEL_PATH, REPEATS_PER_THREAD, warmup=WARMUP)
    results_list.append(res)
    print("done")
    print(f"  Wall time: {res['wall_time']:.6f} s")
    print(f"  Avg per-thread time (mean of threads): {res['avg_per_thread_time']:.6f} s")
    print(f"  Per-thread times: {[round(x,6) for x in res['per_thread_times']]}")
    print()

Running with 1 thread(s)... done
  Wall time: 0.002162 s
  Avg per-thread time (mean of threads): 0.002131 s
  Per-thread times: [0.002131]

Running with 50 thread(s)... done
  Wall time: 0.032885 s
  Avg per-thread time (mean of threads): 0.023889 s
  Per-thread times: [0.025048, 0.028677, 0.024905, 0.022973, 0.027686, 0.029775, 0.025394, 0.020955, 0.022278, 0.022726, 0.020063, 0.025019, 0.02598, 0.02524, 0.027624, 0.019001, 0.024047, 0.022552, 0.031027, 0.017039, 0.024453, 0.029468, 0.022056, 0.009364, 0.028997, 0.019778, 0.028126, 0.028156, 0.028253, 0.018419, 0.02756, 0.027746, 0.018688, 0.028596, 0.015666, 0.026918, 0.018938, 0.024521, 0.024657, 0.015785, 0.021282, 0.017577, 0.027648, 0.023596, 0.026197, 0.025259, 0.028191, 0.017167, 0.025383, 0.02799]

Running with 100 thread(s)... done
  Wall time: 0.063900 s
  Avg per-thread time (mean of threads): 0.043259 s
  Per-thread times: [0.05433, 0.017632, 0.052825, 0.051087, 0.029332, 0.038251, 0.043298, 0.045765, 0.051417, 0.026927, 

In [7]:

# Summary
print("Summary (wall-time vs threads):")
for r in results_list:
    print(f"  Threads={r['num_threads']:2d}  wall_time={r['wall_time']:.6f}s  avg_thread={r['avg_per_thread_time']:.6f}s")

# #compute avarage time per model
# for r in results_list:
#     time_per_model = r['wall_time'] / r['repeats_per_thread'] * r['num_threads']
#     print("time_per_model:", time_per_model)
#

#compute avarage time per model
for r in results_list:
    time_per_model = r['wall_time'] / (r['num_threads'] * r['repeats_per_thread'])
    print(f"Threads={r['num_threads']:2d} | Avg time per model: {time_per_model*1000:.3f} ms")


Summary (wall-time vs threads):
  Threads= 1  wall_time=0.002162s  avg_thread=0.002131s
  Threads=50  wall_time=0.032885s  avg_thread=0.023889s
  Threads=100  wall_time=0.063900s  avg_thread=0.043259s
  Threads=500  wall_time=0.398612s  avg_thread=0.282902s
  Threads=1000  wall_time=7.293591s  avg_thread=6.479115s
  Threads=2000  wall_time=1.280431s  avg_thread=0.005537s
Threads= 1 | Avg time per model: 0.432 ms
Threads=50 | Avg time per model: 0.132 ms
Threads=100 | Avg time per model: 0.128 ms
Threads=500 | Avg time per model: 0.159 ms
Threads=1000 | Avg time per model: 1.459 ms
Threads=2000 | Avg time per model: 0.128 ms
