In [1]:
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
duration = 10

stages = [
    {"duration": duration*1, "users": 100, "spawn_rate":50},
    {"duration": duration*2, "users": 200, "spawn_rate":50},
    {"duration": duration*3, "users": 400, "spawn_rate":50}
]

In [3]:
# CSV Daten in DataFrame einlesen
df = pd.read_csv("data_stats_history.csv")

# Konvertiere die Zeitstempel in ein datetime-Format, um die Daten einfacher zu analysieren
df['Timestamp'] = pd.to_datetime(df['Timestamp'], unit='s')

# Setze den Zeitstempel als Index
df.set_index('Timestamp', inplace=True)

In [4]:

# Entferne Zeilen, in denen 'Type' nicht leer ist (Aggregierte Daten)
df_aggregated = df[df['Type'].isnull()]# Entferne die Spalte 'Type'
df_aggregated.drop(columns=['Type'], inplace=True)

# Berechnen der Endzeitpunkte für jede Phase
start_time = df_aggregated.index[0]
stage_end_times = [start_time + pd.Timedelta(seconds=stage['duration']) for stage in stages]

# Funktion zur Zuordnung der Phasen
def assign_stage(timestamp):
    for i, end_time in enumerate(stage_end_times):
        if timestamp <= end_time:
            return f"Stage {i+1}"
    return f"Stage {len(stage_end_times)}"

# Spalte 'stage' hinzufügen
df_aggregated['stage'] = df_aggregated.index.to_series().apply(assign_stage)

df_aggregated

Unnamed: 0_level_0,User Count,Name,Requests/s,Failures/s,50%,66%,75%,80%,90%,95%,...,99.99%,100%,Total Request Count,Total Failure Count,Total Median Response Time,Total Average Response Time,Total Min Response Time,Total Max Response Time,Total Average Content Size,stage
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-07-01 21:05:10,0,Aggregated,0.0,0.0,,,,,,,...,,,0,0,0.0,0.0,0.0,0.0,0.0,Stage 1
2024-07-01 21:05:11,0,Aggregated,0.0,0.0,,,,,,,...,,,0,0,0.0,0.0,0.0,0.0,0.0,Stage 1
2024-07-01 21:05:12,50,Aggregated,0.0,0.0,,,,,,,...,,,0,0,0.0,0.0,0.0,0.0,0.0,Stage 1
2024-07-01 21:05:13,100,Aggregated,0.0,0.0,,,,,,,...,,,0,0,0.0,0.0,0.0,0.0,0.0,Stage 1
2024-07-01 21:05:14,100,Aggregated,0.0,0.0,86.0,150.0,150.0,150.0,180.0,460.0,...,460.0,460.0,12,0,86.0,134.267446,47.549131,464.053316,196.25,Stage 1
2024-07-01 21:05:15,100,Aggregated,25.0,0.0,90.0,110.0,140.0,150.0,210.0,220.0,...,460.0,460.0,41,0,90.0,116.141019,45.024114,464.053316,14213.707317,Stage 1
2024-07-01 21:05:16,100,Aggregated,35.0,0.0,87.0,100.0,120.0,120.0,210.0,220.0,...,460.0,460.0,56,0,86.0,105.329398,39.843755,464.053316,10468.232143,Stage 1
2024-07-01 21:05:17,100,Aggregated,30.5,0.0,83.0,110.0,140.0,150.0,400.0,480.0,...,1100.0,1100.0,125,1,83.0,153.422478,39.843755,1087.790854,9648.848,Stage 1
2024-07-01 21:05:18,100,Aggregated,36.75,1.25,86.0,140.0,280.0,380.0,630.0,1000.0,...,1500.0,1500.0,177,5,86.0,222.914725,39.843755,1545.276004,15817.926554,Stage 1
2024-07-01 21:05:19,100,Aggregated,37.0,1.25,86.0,150.0,300.0,390.0,670.0,1100.0,...,1700.0,1700.0,197,5,86.0,237.918707,39.843755,1697.807414,15511.593909,Stage 1


In [None]:
# Box plot für Total Average Response Time
plt.figure(figsize=(12, 6))
df_aggregated.boxplot(column='Total Average Response Time', by='stage', grid=False)
plt.title('Total Average Response Time by Stage')
plt.suptitle('')
plt.xlabel('Stage')
plt.ylabel('Total Average Response Time (ms)')
plt.xticks(rotation=45)
plt.show()

# Box plot für Total Median Response Time
plt.figure(figsize=(12, 6))
df_aggregated.boxplot(column='Total Median Response Time', by='stage', grid=False)
plt.title('Total Median Response Time by Stage')
plt.suptitle('')
plt.xlabel('Stage')
plt.ylabel('Total Median Response Time (ms)')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Fehlerrate analysieren
df_aggregated['Failure Rate'] = df_aggregated['Total Failure Count'] / (df_aggregated['Total Request Count'] + df_aggregated['Total Failure Count'])

plt.figure(figsize=(12, 6))
df_aggregated.groupby('stage')['Failure Rate'].mean().plot(kind='bar', rot=45)
plt.title('Average Failure Rate by Stage')
plt.xlabel('Stage')
plt.ylabel('Failure Rate')
plt.show()

In [None]:
# Gesamte Läufe vergleichen
summary_stats = df_aggregated.groupby('stage').agg({
    'Total Average Response Time': ['mean', 'median', 'std','var'],
    'Total Median Response Time': ['mean', 'median', 'std','var'],
    'Failure Rate': ['mean', 'median', 'std','var']
})

print(summary_stats)

# Visualisierung der zusammengefassten Statistiken
summary_stats.plot(kind='bar', subplots=True, layout=(4, 4), figsize=(18, 16))
plt.show()