# Measure Performance

In [2]:
import requests
import time
import numpy as np

In [3]:
import base64
image_path = "image.jpg"
with open(image_path, 'rb') as f:
    image_bytes = f.read()
encoded_str =  base64.b64encode(image_bytes).decode("utf-8")
print('"' + encoded_str + '"')

"/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAMCAgMCAgMDAwMEAwMEBQgFBQQEBQoHBwYIDAoMDAsKCwsNDhIQDQ4RDgsLEBYQERMUFRUVDA8XGBYUGBIUFRQBAwQEBQQFCQUFCRQNCw0UFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFP/CABEIBAAEAAMBIgACEQEDEQH/xAA3AAABBAMBAQEAAAAAAAAAAAAGAQQFBwADCAIJCgEAAwEBAQEBAAAAAAAAAAAAAAECAwQFBgf/2gAMAwEAAhADEAAAAPmMuZ18eZmDVFQMVMBfK4GKmBmYoJmYGZihmJgZioCoqBmZgYqKCZiAqooJiKCpmAqZgZmKGZigiZgKmIC5igi5gYmYGZmBmYgLmYGKmBmL5D1iKGIuAmZgKmYGYqBmYoImYCouBiZg8zFGmZgsXMGmYoecXBZi4LPPpB4qYLMzB4i4LMRR5mKHlcQSouBi5ipF8459ZiAqLgJioC4mB68qgLiKGIuBipgLiKGJ6QMTMBcT0HnMUMxMBcTAXMQFVMBMzAVMwPSZgYvn0GIvkFTFBFzARUUExUDFTAXEUMVMDEVAxUwFRcBMXARcQPXnMDMzAzMwFzFDMzA8riAuIoIuYGIuBmIoZioGZiBmJgKvlSsxUGiqgkVFDMTAXMwMxUDMRQzyqiTF8gqKgYuKCKmDzFwFT15HmZgsVMSzMVpE9IGZmAuIoJi+QXEUMzMDFTAzFwMxFDEXAxc8guJgKioCpmBiooIuIGLigiZgIuKhMzGYqYGYiB6zyoesTAVPXkPXnMBV8oC4ihiKqPPrMZmIoZmYGJmAvn1gJmbg051VZfbw8Er0lz9nrHr49c3TmZgIqKCouB5XMBFRQRVQEVEDFRQRcwrF8qPMVAzMwEXMEqZgekTB4qYLMTAzFQWL4wPS+fYeVzB4qYGIuAi55FmZ6Fi4gZiKCou

## Pytorch - Fast API - RegNet - Human Vs AI

In [6]:
FASTAPI_URL = "http://fastapi_server:8000/predict"

# FASTAPI_URL = "http://localhost:8000/predict"

payload = {"image": encoded_str}
num_requests = 100
inference_times = []

for _ in range(num_requests):
    start_time = time.time()
    response = requests.post(FASTAPI_URL, json=payload)
    end_time = time.time()

    if response.status_code == 200:
        inference_times.append(end_time - start_time)
    else:
        print(f"Error: {response.status_code}, Response: {response.text}")

inference_times = np.array(inference_times)
median_time = np.median(inference_times)
percentile_95 = np.percentile(inference_times, 95)
percentile_99 = np.percentile(inference_times, 99)
throughput = num_requests / inference_times.sum()  

print(f"Median inference time: {1000*median_time:.4f} ms")
print(f"95th percentile: {1000*percentile_95:.4f} ms")
print(f"99th percentile: {1000*percentile_99:.4f} seconds")
print(f"Throughput: {throughput:.2f} requests/sec")

Median inference time: 861.2221 ms
95th percentile: 1079.5787 ms
99th percentile: 1290.1483 seconds
Throughput: 1.12 requests/sec


## Concurrent requests - Pytorch - Fast API - RegNet - Human VS AI

In [7]:
import concurrent.futures

FASTAPI_URL = "http://fastapi_server:8000/predict"

def send_request(payload):
    start_time = time.time()
    response = requests.post(FASTAPI_URL, json=payload)
    end_time = time.time()
    
    if response.status_code == 200:
        return end_time - start_time
    else:
        print(f"Error: {response.status_code}, Response: {response.text}")
        return None

def run_concurrent_tests(num_requests, payload, max_workers=10):
    inference_times = []
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = [executor.submit(send_request, payload) for _ in range(num_requests)]
        
        for future in concurrent.futures.as_completed(futures):
            result = future.result()
            if result is not None:
                inference_times.append(result)
    
    return inference_times

num_requests = 1000
start_time = time.time()
inference_times = run_concurrent_tests(num_requests, payload, max_workers=16)
total_time = time.time() - start_time

inference_times = np.array(inference_times)
median_time = np.median(inference_times)
percentile_95 = np.percentile(inference_times, 95)
percentile_99 = np.percentile(inference_times, 99)
throughput = num_requests / total_time

print(f"Median inference time: {1000*median_time:.4f} ms")
print(f"95th percentile: {1000*percentile_95:.4f} ms")
print(f"99th percentile: {1000*percentile_99:.4f} seconds")
print(f"Throughput: {throughput:.2f} requests/sec")

Median inference time: 6354.0373 ms
95th percentile: 7806.5891 ms
99th percentile: 8986.2466 seconds
Throughput: 2.47 requests/sec


## Pytorch - Fast API - ViT - Human Vs AI

In [None]:
FASTAPI_URL = "http://fastapi_server:8000/predict"

# FASTAPI_URL = "http://localhost:8000/predict"

payload = {"image": encoded_str}
num_requests = 100
inference_times = []

for _ in range(num_requests):
    start_time = time.time()
    response = requests.post(FASTAPI_URL, json=payload)
    end_time = time.time()

    if response.status_code == 200:
        inference_times.append(end_time - start_time)
    else:
        print(f"Error: {response.status_code}, Response: {response.text}")

inference_times = np.array(inference_times)
median_time = np.median(inference_times)
percentile_95 = np.percentile(inference_times, 95)
percentile_99 = np.percentile(inference_times, 99)
throughput = num_requests / inference_times.sum()  

print(f"Median inference time: {1000*median_time:.4f} ms")
print(f"95th percentile: {1000*percentile_95:.4f} ms")
print(f"99th percentile: {1000*percentile_99:.4f} seconds")
print(f"Throughput: {throughput:.2f} requests/sec")

## ONNX - Fast API - ViT - Human Vs AI

## Pytorch - Triton Server - BLIP - Image Captioning

## VLLM - LLM - Tags Generation