In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import pytz
import plotly.graph_objects as go

In [None]:
basedir = "../data/2024-07-09/"
locust_data_history_file = basedir + "data_stats_history.csv"
locust_data_file = basedir + "data_stats.csv"

stages = [
    {"users": 100},
    {"users": 200},
    {"users": 300},
    {"users": 500},
    {"users": 800},
    {"users": 1300},
    {"users": 2100}
]

valid_users = set()
for stage in stages:
    valid_users.add(stage['users'])

In [None]:
# CSV Daten in DataFrame einlesen
locust_df = pd.read_csv("%s" % locust_data_history_file)

# Konvertiere die Zeitstempel in ein datetime-Format, um die Daten einfacher zu analysieren
locust_df['Timestamp'] = pd.to_datetime(locust_df['Timestamp'], unit='s')
# Zeitzone in Deutschland festlegen
germany_tz = pytz.timezone('Europe/Berlin')
locust_df['Timestamp'] = locust_df['Timestamp'].dt.tz_localize('UTC').dt.tz_convert(germany_tz)

# Setze den Zeitstempel als Index
locust_df.set_index('Timestamp', inplace=True)

# Entferne die Spalte 'Type'
locust_df.drop(columns=['Type'], inplace=True)

# Filtere die Zeilen, bei denen 'Request Count' gleich 0 ist
locust_df = locust_df[locust_df['Total Request Count'] != 0]

# Entferne alle Zeilen, bei denen 'users' nicht im Set 'valid_users' ist
locust_df = locust_df[locust_df['User Count'].isin(valid_users)]

In [None]:
locust_df

In [None]:
locust_requests_count_df = pd.read_csv("%s" % locust_data_file)

# Filter out the 'Aggregated' row
locust_requests_count_df = locust_requests_count_df[locust_requests_count_df['Name'] != 'Aggregated']

# Create a new column for services
locust_requests_count_df['Service'] = locust_requests_count_df['Name'].apply(lambda x: x.split('/')[1])

# Remove the service prefix from the 'Name' column
locust_requests_count_df['Request Path'] = locust_requests_count_df.apply(
    lambda row: row['Name'].replace(f"/{row['Service']}", ""), axis=1)

# Calculate the total number of requests
total_requests = locust_requests_count_df['Request Count'].sum()

# Calculate the percentage of each request type
locust_requests_count_df['Request Percentage'] = (locust_requests_count_df['Request Count'] / total_requests) * 100

# Create a new Request Type column with HTTP method and percentage
locust_requests_count_df['Formatted Request Type'] = locust_requests_count_df.apply(
    lambda row: f"{row['Type']} {row['Request Path']} ({row['Request Percentage']:.2f}%)", axis=1)

# Prepare the data for the sunburst plot
labels = locust_requests_count_df['Formatted Request Type'].tolist() + locust_requests_count_df['Service'].unique().tolist()
parents = locust_requests_count_df['Service'].tolist() + [''] * len(locust_requests_count_df['Service'].unique())
values = locust_requests_count_df['Request Percentage'].tolist() + \
         locust_requests_count_df.groupby('Service')['Request Percentage'].sum().tolist()

# Create the sunburst plot
fig = go.Figure(go.Sunburst(
    labels=labels,
    parents=parents,
    values=values,
    insidetextorientation='radial',  # Control text orientation
    branchvalues='total',
))

# Update layout
fig.update_layout(
    margin=dict(l=40, r=40, t=40, b=40),
    width=800,
    height=800,
    title='Distribution of Requests per Services in Percent'
)

# Show the plot
fig.show()

In [None]:
# Get the min and max index (time) for the current stage
start_time, end_time = locust_df.index.min(), locust_df.index.max()
# Convert the timezoned datetime to naive datetime (UTC or localize as needed)
start_time = start_time.tz_convert("Europe/Berlin").tz_localize(None)
end_time = end_time.tz_convert("Europe/Berlin").tz_localize(None)
print("Start:",start_time, "; End:",end_time)

In [None]:
fig, ax1 = plt.subplots(figsize=(25, 8))

# Primary Y-Axis
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Request & Failure Rates')
ax1.plot(locust_df.index, locust_df['Requests/s'], label='Requests/s', marker='.')
ax1.plot(locust_df.index, locust_df['Failures/s'], label='Failures/s', marker='.')
ax1.tick_params(axis='y')
ax1.legend(loc='upper left')
ax1.grid(True)

# Secondary Y-Axis
ax2 = ax1.twinx()
ax2.set_ylabel('User Count')
ax2.plot(locust_df.index, locust_df['User Count'], label='User Count', marker='.',color='magenta')
ax2.tick_params(axis='y', labelcolor='magenta')
ax2.legend(loc='upper right')

plt.title('User Count Performance Metrics over Time')
plt.show()

In [None]:
fig, ax1 = plt.subplots(figsize=(22, 8))

# Primary Y-Axis
ax1.set_xlabel('Timestamp')
ax1.set_ylabel('Total Median Response Time')
ax1.plot(locust_df.index, locust_df['Total Median Response Time'], label='Median Response Time', marker='.')
ax1.tick_params(axis='y')
ax1.grid(True)

# Secondary Y-Axis
ax2 = ax1.twinx()
ax2.set_ylabel('Total Average Response Time')
ax2.plot(locust_df.index, locust_df['Total Average Response Time'], label='Avg Response Time', color='magenta', marker='.')
ax2.tick_params(axis='y', labelcolor='magenta')

plt.title('Response Time Metrics over Time')
plt.show()

In [None]:
# Box plot für Total Average Response Time
plt.figure(figsize=(12, 6))
locust_df.boxplot(column='Total Average Response Time', by='User Count', grid=False)
plt.title('Total Average Response Time by Stage')
plt.suptitle('')
plt.xlabel('User Count')
plt.ylabel('Total Average Response Time (ms)')
plt.xticks(rotation=45)
plt.show()

# Box plot für Total Median Response Time
plt.figure(figsize=(12, 6))
locust_df.boxplot(column='Total Median Response Time', by='User Count', grid=False)
plt.title('Total Median Response Time by Stage')
plt.suptitle('')
plt.xlabel('User Count')
plt.ylabel('Total Median Response Time (ms)')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Fehlerrate analysieren
locust_df['Failure Rate'] = locust_df['Total Failure Count'] / (
            locust_df['Total Request Count'] + locust_df['Total Failure Count'])

plt.figure(figsize=(12, 6))
locust_df.groupby('User Count')['Failure Rate'].mean().plot(kind='bar', rot=45)
plt.title('Average Failure Rate by Stage')
plt.xlabel('User Count')
plt.ylabel('Failure Rate')
plt.show()

In [None]:
# Gesamte Läufe vergleichen
summary_stats = locust_df.groupby('User Count').agg({
    'Total Average Response Time': ['mean', 'median', 'std'],
    'Total Median Response Time': ['mean', 'median', 'std'],
    'Failure Rate': ['mean', 'median', 'std']
})

print(summary_stats)

# Visualisierung der zusammengefassten Statistiken
summary_stats.plot(kind='bar', subplots=True, layout=(3, 3), figsize=(18, 16))
plt.show()

In [None]:
# Calculate confidence intervals for the 'Total Median Response Time'
grouped_stats = locust_df.groupby('User Count')['Total Median Response Time'].agg(['mean', 'count', 'std'])

# Calculate t-value for a 95% confidence interval
t_value = stats.t.ppf(0.975, grouped_stats['count'] - 1)  # 0.975 corresponds to (1 - alpha/2)

# Calculate the margin of error
grouped_stats['margin_of_error'] = t_value * grouped_stats['std'] / (grouped_stats['count'] ** 0.5)

# Calculate the lower and upper bounds of the confidence interval
grouped_stats['ci_low'] = grouped_stats['mean'] - grouped_stats['margin_of_error']
grouped_stats['ci_high'] = grouped_stats['mean'] + grouped_stats['margin_of_error']

# Plotting
plt.figure(figsize=(10, 6))
plt.errorbar(grouped_stats.index, grouped_stats['mean'],
             yerr=grouped_stats['margin_of_error'],
             fmt='-o', ecolor='r', capsize=5, capthick=2, label='95% CI')

plt.title('95% Confidence Intervals for Mean Total Median Response Time')
plt.xlabel('User Count')
plt.ylabel('Total Median Response Time')
plt.legend()
plt.grid(True)
plt.show()

In [None]:
# Calculate confidence intervals for the 'Failure Rate'
grouped_stats = locust_df.groupby('User Count')['Failure Rate'].agg(['mean', 'count', 'std'])

# Calculate t-value for a 95% confidence interval
t_value = stats.t.ppf(0.975, grouped_stats['count'] - 1)  # 0.975 corresponds to (1 - alpha/2)

# Calculate the margin of error
grouped_stats['margin_of_error'] = t_value * grouped_stats['std'] / (grouped_stats['count'] ** 0.5)

# Calculate the lower and upper bounds of the confidence interval
grouped_stats['ci_low'] = grouped_stats['mean'] - grouped_stats['margin_of_error']
grouped_stats['ci_high'] = grouped_stats['mean'] + grouped_stats['margin_of_error']

# Plotting
plt.figure(figsize=(10, 6))
plt.errorbar(grouped_stats.index, grouped_stats['mean'],
             yerr=grouped_stats['margin_of_error'],
             fmt='-o', ecolor='r', capsize=5, capthick=2, label='95% CI')

plt.title('95% Confidence Intervals for Mean Failure Rate')
plt.xlabel('User Count')
plt.ylabel('Failure Rate')
plt.legend()
plt.grid(True)
plt.show()