# Import Statements

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Daten einlesen und vorbereiten

In [24]:
def bereinige_und_speichere_csv(input_csv):
    # Lese das CSV ein
    df = pd.read_csv(input_csv)
    
    # Finde alle Spalten, die 'loadgenerator.2' enthalten
    spalten_zu_entfernen = [spalte for spalte in df.columns if '.1' in spalte]
    
    # Entferne diese Spalten aus dem DataFrame
    df_bereinigt = df.drop(spalten_zu_entfernen, axis=1)
    
    # Speichere das bereinigte DataFrame am ursprünglichen Speicherort
    df_bereinigt.to_csv(input_csv, index=False)

# Ersetze "dein_csv_pfad.csv" mit dem Pfad zu deinem CSV
bereinige_und_speichere_csv("./../../timeseries/merged/constant-load/medium/new.csv")


# Hilfscode um mehrere Spalten zu vergleichen

In [30]:
# CSV-Dateien einlesen
df1 = pd.read_csv('./../../timeseries/merged/constant-load/high_low/new.csv')
df2 = pd.read_csv('./../../timeseries/merged/service-penetration/slower/file.csv')

# Spaltennamen der DataFrames extrahieren
spalten_df1 = df1.columns.tolist()
spalten_df2 = df2.columns.tolist()

# Die Länge der längsten Spaltenliste bestimmen
max_len = max(len(spalten_df1), len(spalten_df2))

print("Vergleich der Spalten:")
print(f"{'CSV 1':<50} | {'CSV 2':<50}")
print("-" * 105)

# Durch die Spalten iterieren und vergleichen
for i in range(max_len):
    spalte1 = spalten_df1[i] if i < len(spalten_df1) else ""
    spalte2 = spalten_df2[i] if i < len(spalten_df2) else ""
    if spalte1 != spalte2:  # Nur Unterschiede anzeigen
        print(f"{spalte1:<50} | {spalte2:<50}")

# Hinweis auf zusätzliche Spalten in CSV 2
if len(spalten_df2) > len(spalten_df1):
    print("\nZusätzliche Spalten in CSV 2:")
    for i in range(len(spalten_df1), len(spalten_df2)):
        print(f"- {spalten_df2[i]}")

Vergleich der Spalten:
CSV 1                                              | CSV 2                                             
---------------------------------------------------------------------------------------------------------
high_low_i_o_read                                  | slower_i_o_read                                   
high_low_i_o_write                                 | slower_i_o_write                                  
high_low_memory                                    | slower_memory                                     
pod_restart_{container="main", instance="kube"     | slower_pod_restart_{container="main", instance="kube
pod_restart_{container="redis", instance="kube"    | slower_pod_restart_{container="redis", instance="kube
pod_restart_{container="server", instance="kube"   | slower_pod_restart_{container="server", instance="kube
pod_restart_{container="server", instance="kube.2" | slower_pod_restart_{container="server", instance="kube.2
pod_restart_{container="

# Alle Files zusammen mergen

In [18]:
import glob
path = "./../../timeseries/merged"
all_files = glob.glob(path + '/**/*.csv', recursive=True)
print(all_files)

['./../../timeseries/merged/peak-load/slower/file.csv', './../../timeseries/merged/peak-load/faster/file.csv', './../../timeseries/merged/newer/medium/file.csv', './../../timeseries/merged/newer/long/file.csv', './../../timeseries/merged/constant-load/high_low/file.csv', './../../timeseries/merged/constant-load/medium/file.csv', './../../timeseries/merged/constant-load/long/file.csv', './../../timeseries/merged/long-time/high/file.csv', './../../timeseries/merged/long-time/low/file.csv', './../../timeseries/merged/service-penetration/checkout/file.csv', './../../timeseries/merged/service-penetration/cart/file.csv']


In [19]:
df_list = []
for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header= 0)
    df_list.append(df)

combined_df = pd.concat(df_list, axis=0, ignore_index=True)

combined_df.to_csv('./../../timeseries/merged/combined_timeseries.csv', index=False)

combined_df.head(1000)

Unnamed: 0,Timestamp,Requests je Sekunde,Durchschnittliche Antwortzeitintervalle,slower_i_o_read,slower_i_o_write,slower_memory,network_outgoing_pod_adservice,network_outgoing_pod_cartservice,network_outgoing_pod_checkoutservice,network_outgoing_pod_currencyservice,...,"cart_pod_restart_{container=""redis"", instance=""kube","cart_pod_restart_{container=""server"", instance=""kube","cart_pod_restart_{container=""server"", instance=""kube.2","cart_pod_restart_{container=""server"", instance=""kube.3","cart_pod_restart_{container=""server"", instance=""kube.4","cart_pod_restart_{container=""server"", instance=""kube.5","cart_pod_restart_{container=""server"", instance=""kube.6","cart_pod_restart_{container=""server"", instance=""kube.7","cart_pod_restart_{container=""server"", instance=""kube.8","cart_pod_restart_{container=""server"", instance=""kube.9"
0,44,20,157.000000,0.0,0.0,1.216046e+10,111.0,150.0,179.0,174.0,...,,,,,,,,,,
1,46,25,97.200000,0.0,0.0,1.218717e+10,111.0,150.0,179.0,174.0,...,,,,,,,,,,
2,48,24,107.583333,0.0,0.0,1.243227e+10,111.0,149.0,179.0,174.0,...,,,,,,,,,,
3,50,40,147.100000,0.0,0.0,1.244914e+10,111.0,149.0,179.0,174.0,...,,,,,,,,,,
4,52,43,153.767442,0.0,0.0,1.249924e+10,111.0,149.0,179.0,168.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,334,127,902.181102,,,,5793.0,14275.0,11384.0,23009.0,...,,,,,,,,,,
996,336,111,806.567568,,,,5867.0,14275.0,11384.0,23009.0,...,,,,,,,,,,
997,338,105,650.600000,,,,5907.0,14275.0,11560.0,23601.0,...,,,,,,,,,,
998,340,119,961.000000,,,,6087.0,14275.0,11641.0,23601.0,...,,,,,,,,,,
