In [2]:
import requests
import time
import numpy as np

# Server endpoints
url1 = "http://server-service.client-server.svc.cluster.local"  # Client pod and server pod are at different nodes
url2 = "http://server-service2.client-server.svc.cluster.local"  # Client pod and server pod are at same nodes

urls = [url1, url2]
qps_values = [1000000, 5000000]  # Different queries per second
test_duration_seconds = 10  # Test duration in seconds for each message size and QPS level

message_sizes = [256, 1024, 4096, 16384, 65536, 262144, 1048576]  # Message sizes in Bytes

# Store results in a structured format
results = {qps: {ms: [] for ms in message_sizes} for qps in qps_values}

for qps in qps_values:
    print(f"Testing at QPS: {qps}")
    for message_size in message_sizes:
        summary = []
        for url in urls:
            message = 'x' * message_size  # Create a message of 'x' repeated to the desired size
            response_times = []
            start_test = time.time()

            while (time.time() - start_test) < test_duration_seconds:
                start_time = time.time()
                try:
                    # Use POST to send data
                    response = requests.post(url + "/post", data=message)
                    response.raise_for_status()  # Check for HTTP errors
                    end_time = time.time()
                    response_time_ms = (end_time - start_time) * 1000  # Convert response time to milliseconds
                    response_times.append(response_time_ms)
                except requests.RequestException as e:
                    print(f"Request failed: {e}")
                finally:
                    # Sleep to maintain the QPS rate
                    # end_time = time.time()
                    # sleep_time = max(0, (1 / qps) - (end_time - start_time))
                    # time.sleep(sleep_time)

            # Calculate P99 latency for the current URL and message size
            if response_times:
                p99_latency = np.percentile(response_times, 99)
                summary.append(p99_latency)
            else:
                summary.append(float('inf'))

        # Store results for this message size
        results[qps][message_size] = summary
        print(f"Message Size: {message_size} bytes, URL1 P99: {summary[0]:.2f} ms, URL2 P99: {summary[1]:.2f} ms")

# Print all collected data at the end
for qps, data in results.items():
    print(f"QPS: {qps}")
    for ms, times in data.items():
        print(f"Message Size: {ms} bytes, URL1 P99 and URL2 P99 are: [{times[0]:.2f}, {times[1]:.2f}]")


Testing at QPS: 1000000
Message Size: 256 bytes, URL1 P99: 43.99 ms, URL2 P99: 28.69 ms
Message Size: 1024 bytes, URL1 P99: 28.59 ms, URL2 P99: 44.48 ms
Message Size: 4096 bytes, URL1 P99: 27.61 ms, URL2 P99: 40.94 ms
Message Size: 16384 bytes, URL1 P99: 35.11 ms, URL2 P99: 34.79 ms
Message Size: 65536 bytes, URL1 P99: 41.72 ms, URL2 P99: 50.59 ms
Message Size: 262144 bytes, URL1 P99: 55.90 ms, URL2 P99: 60.63 ms
Message Size: 1048576 bytes, URL1 P99: 77.90 ms, URL2 P99: 74.63 ms
Testing at QPS: 5000000
Message Size: 256 bytes, URL1 P99: 66.31 ms, URL2 P99: 25.71 ms
Message Size: 1024 bytes, URL1 P99: 44.05 ms, URL2 P99: 33.67 ms
Message Size: 4096 bytes, URL1 P99: 40.54 ms, URL2 P99: 50.36 ms
Message Size: 16384 bytes, URL1 P99: 51.28 ms, URL2 P99: 74.19 ms
Message Size: 65536 bytes, URL1 P99: 60.49 ms, URL2 P99: 34.64 ms
Message Size: 262144 bytes, URL1 P99: 54.97 ms, URL2 P99: 29.89 ms
Message Size: 1048576 bytes, URL1 P99: 44.83 ms, URL2 P99: 51.51 ms
QPS: 1000000
Message Size: 256

In [None]:
import requests
import time
import numpy as np
import matplotlib.pyplot as plt

# Server endpoint
url = "http://server-service.client-server.svc.cluster.local"

# Different queries per second
qps_values = [1000,100000]
test_duration_seconds = 5  # Duration for each test
percentiles = {'p50': [], 'p90': [], 'p99': []}

for qps in qps_values:
    response_times = []
    start_test = time.time()

    while (time.time() - start_test) < test_duration_seconds:
        start_time = time.time()
        try:
            response = requests.get(url)
            response.raise_for_status()  # Raises an exception for HTTP error codes
            end_time = time.time()
            response_time_ms = (end_time - start_time) * 1000  # Convert response time to milliseconds
            response_times.append(response_time_ms)
        except requests.RequestException as e:
            print(f"Request failed: {e}")
            continue  # Skip to next iteration without sleeping or adding response time

        # Sleep to maintain the QPS rate
        sleep_time = max(0, (1 / qps) - (end_time - start_time))
        time.sleep(sleep_time)

    # Calculate and store percentiles for successful responses
    if response_times:
        percentiles['p50'].append(np.percentile(response_times, 50))
        percentiles['p90'].append(np.percentile(response_times, 90))
        percentiles['p99'].append(np.percentile(response_times, 99))
    else:
        percentiles['p50'].append(float('inf'))
        percentiles['p90'].append(float('inf'))
        percentiles['p99'].append(float('inf'))

    print(f"QPS: {qps}, P50: {percentiles['p50'][-1]:.2f} ms, P90: {percentiles['p90'][-1]:.2f} ms, P99: {percentiles['p99'][-1]:.2f} ms")

# Save the data to a file
with open('response_times.csv', 'w') as f:
    f.write('QPS,P50,P90,P99\n')
    for i, qps in enumerate(qps_values):
        f.write(f"{qps},{percentiles['p50'][i]:.2f},{percentiles['p90'][i]:.2f},{percentiles['p99'][i]:.2f}\n")

# Print all data in a row like [qps, p99]
all_data = [[qps, percentiles['p99'][i]] for i, qps in enumerate(qps_values)]
print(all_data)


In [None]:
# run the load_test.py from client pod

# run inside the client pod
import requests
import time
import numpy as np
import matplotlib.pyplot as plt

# Server endpoint
url = "http://server-service.client-server.svc.cluster.local"

# Different queries per second
qps_values = [10000, 20000, 50000, 100000, 200000, 500000, 1000000,5000000]
test_duration_seconds = 2  # 2 minutes per QPS
percentiles = {'p50': [], 'p90': [], 'p99': []}

for qps in qps_values:
    response_times = []
    start_test = time.time()

    while (time.time() - start_test) < test_duration_seconds:
        start_time = time.time()
        try:
            response = requests.get(url)
            response.raise_for_status()  # This will raise an exception for HTTP error codes
            end_time = time.time()
            response_time_ms = (end_time - start_time) * 1000  # Convert response time to milliseconds
            response_times.append(response_time_ms)
            # Sleep to maintain the QPS rate
            sleep_time = max(0, (1 / qps) - (end_time - start_time))
            time.sleep(sleep_time)
        except requests.RequestException as e:
            print(f"Request failed: {e}")
            # Continue to maintain QPS even after a failed request
            end_time = time.time()
            sleep_time = max(0, (1 / qps) - (end_time - start_time))
            time.sleep(sleep_time)

    # Calculate percentiles
    if response_times:
        percentiles['p50'].append(np.percentile(response_times, 50))
        percentiles['p90'].append(np.percentile(response_times, 90))
        percentiles['p99'].append(np.percentile(response_times, 99))
    else:
        # percentiles['p50'].append(float('inf'))
        # percentiles['p90'].append(float('inf'))
        percentiles['p99'].append(float('inf'))

    print(f"QPS: {qps}, P50: {percentiles['p50'][-1]:.2f} ms, P90: {percentiles['p90'][-1]:.2f} ms, P99: {percentiles['p99'][-1]:.2f} ms")

# Save the data to a file
all_data = [] # save all [qps, p99] data pair
with open('response_times.csv', 'w') as f:
    # f.write('QPS,p50,p90,p99\n')
    for i, qps in enumerate(qps_values):
        f.write(f"{qps},{percentiles['p50'][i]:.2f},{percentiles['p90'][i]:.2f},{percentiles['p99'][i]:.2f}\n")
        f.write(f"[QPS, p99]= [{qps}, {percentiles['p99'][i]:.2f}]\n")
        all_data.append([qps, int(percentiles['p99'][i])])


# print all data in a row like [pqs,p99]
print(all_data)

        
        

# Plotting the response times
plt.figure(figsize=(10, 6))
# plt.plot(qps_values, percentiles['p50'], marker='o', label='P50 (ms)')
# plt.plot(qps_values, percentiles['p90'], marker='o', label='P90 (ms)')
plt.plot(qps_values, percentiles['p99'], marker='o', label='P99 (ms)')
plt.title("Response Time Percentiles (ms) vs Queries Per Second")
plt.xlabel("Queries Per Second (QPS)")
plt.ylabel("Response Time (milliseconds)")
plt.legend()
plt.grid(True)
plt.savefig("response_times_plot_ms.png")
plt.show()


In [None]:
# run inside the client pod
import requests
import time
import numpy as np
# import matplotlib.pyplot as plt

# Server endpoint
url = "http://server-service.client-server.svc.cluster.local"

# Different queries per second
qps_values = [1000, 3000, 5000, 7000, 10000, 20000, 50000]
test_duration_seconds = 120  # 2 minutes per QPS
percentiles = {'p50': [], 'p90': [], 'p99': []}

for qps in qps_values:
    response_times = []
    start_test = time.time()

    while (time.time() - start_test) < test_duration_seconds:
        start_time = time.time()
        try:
            response = requests.get(url)
            response.raise_for_status()  # This will raise an exception for HTTP error codes
            end_time = time.time()
            response_time_ms = (end_time - start_time) * 1000  # Convert response time to milliseconds
            response_times.append(response_time_ms)
            # Sleep to maintain the QPS rate
            sleep_time = max(0, (1 / qps) - (end_time - start_time))
            time.sleep(sleep_time)
        except requests.RequestException as e:
            print(f"Request failed: {e}")
            # Continue to maintain QPS even after a failed request
            end_time = time.time()
            sleep_time = max(0, (1 / qps) - (end_time - start_time))
            time.sleep(sleep_time)

    # Calculate percentiles
    if response_times:
        percentiles['p50'].append(np.percentile(response_times, 50))
        percentiles['p90'].append(np.percentile(response_times, 90))
        percentiles['p99'].append(np.percentile(response_times, 99))
    else:
        percentiles['p50'].append(float('inf'))
        percentiles['p90'].append(float('inf'))
        percentiles['p99'].append(float('inf'))

    print(f"QPS: {qps}, P50: {percentiles['p50'][-1]:.2f} ms, P90: {percentiles['p90'][-1]:.2f} ms, P99: {percentiles['p99'][-1]:.2f} ms")

# Save the data to a file
with open('response_times.csv', 'w') as f:
    f.write('QPS,p50,p90,p99\n')
    for i, qps in enumerate(qps_values):
        f.write(f"{qps},{percentiles['p50'][i]:.2f},{percentiles['p90'][i]:.2f},{percentiles['p99'][i]:.2f}\n")

# Plotting the response times
# plt.figure(figsize=(10, 6))
# plt.plot(qps_values, percentiles['p50'], marker='o', label='P50 (ms)')
# plt.plot(qps_values, percentiles['p90'], marker='o', label='P90 (ms)')
# plt.plot(qps_values, percentiles['p99'], marker='o', label='P99 (ms)')
# plt.title("Response Time Percentiles (ms) vs Queries Per Second")
# plt.xlabel("Queries Per Second (QPS)")
# plt.ylabel("Response Time (milliseconds)")
# plt.legend()
# plt.grid(True)
# plt.savefig("response_times_plot_ms.png")
# plt.show()


In [None]:
'''run bandwidth limit test in client pod'''

import requests
import time
import numpy as np

# Server endpoint
url = "http://server-service.client-server.svc.cluster.local"

# Different queries per second
qps_values = [5000, 20000]
test_duration_seconds = 60  # 5 minute per QPS
percentiles = {'p50': [], 'p90': [], 'p99': []}

for qps in qps_values:
    response_times = []
    start_test = time.time()

    while (time.time() - start_test) < test_duration_seconds:
        start_time = time.time()
        try:
            response = requests.get(url)
            response.raise_for_status()  # This will raise an exception for HTTP error codes
            end_time = time.time()
            response_time_ms = (end_time - start_time) * 1000  # Convert response time to milliseconds
            response_times.append(response_time_ms)
            # Sleep to maintain the QPS rate
            sleep_time = max(0, (1 / qps) - (end_time - start_time))
            time.sleep(sleep_time)
        except requests.RequestException as e:
            print(f"Request failed: {e}")
            # Continue to maintain QPS even after a failed request
            end_time = time.time()
            sleep_time = max(0, (1 / qps) - (end_time - start_time))
            time.sleep(sleep_time)

    # Calculate percentiles
    if response_times:
        percentiles['p50'].append(np.percentile(response_times, 50))
        percentiles['p90'].append(np.percentile(response_times, 90))
        percentiles['p99'].append(np.percentile(response_times, 99))
    else:
        percentiles['p50'].append(float('inf'))
        percentiles['p90'].append(float('inf'))
        percentiles['p99'].append(float('inf'))

    print(f"QPS: {qps}, P50: {percentiles['p50'][-1]:.2f} ms, P90: {percentiles['p90'][-1]:.2f} ms, P99: {percentiles['p99'][-1]:.2f} ms")

# Print the QPS and P99 arrays
print("QPS Values:", qps_values)
print("P99 Response Times:", percentiles['p99'])