# Import everything needed in this notebook

In [29]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
import numpy as np
import sys
import os


In [30]:
# Füge das übergeordnete Verzeichnis zum Python-Modulsuchpfad hinzu
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
input_file_path = "./../../timeseries/combined/scaled_combined_timeseries.csv"
df = pd.read_csv(input_file_path)

In [32]:
imputer = SimpleImputer(strategy='mean')
df_imputed = pd.DataFrame(imputer.fit_transform(df.select_dtypes(include=['float64', 'int64'])))
df_imputed.columns = df.select_dtypes(include=['float64', 'int64']).columns
df_imputed['Timestamp'] = df['Timestamp']  # Timestamp (oder andere nicht-numerische Spalten) hinzufügen, falls benötigt

In [33]:
X = df_imputed.drop(['Durchschnittliche Antwortzeitintervalle', 'Timestamp'], axis=1)
y = df_imputed['Durchschnittliche Antwortzeitintervalle']

# Aufteilung in Trainings- und Testdaten
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
# Angenommen, X_train und y_train sind bereits definiert und vorbereitet
# Trainieren des Modells
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

# Holen der Feature Importances
importances = model.feature_importances_

# Konvertieren der Importances in einen DataFrame für eine bessere Visualisierung
features_df = pd.DataFrame({'Feature': X_train.columns, 'Importance': importances})

# Sortieren der Features nach ihrer Wichtigkeit
features_df = features_df.sort_values(by='Importance', ascending=False)

# Anzeigen der Top 3-5 Features
print(features_df.head(20))

                        Feature  Importance
40      pod-restart-count-pod-9    0.316827
0           Requests je Sekunde    0.164157
25                cpu_pod-pod-8    0.159202
10   network_outgoing_pod-pod-7    0.082298
61            memory_log_scaled    0.045663
11   network_outgoing_pod-pod-8    0.041247
57  cpu_pod-pod-1_rolling_avg_5    0.030367
54    Requests je Sekunde_lag_5    0.021953
50    Requests je Sekunde_lag_1    0.017718
51    Requests je Sekunde_lag_2    0.010637
9    network_outgoing_pod-pod-6    0.010326
5    network_outgoing_pod-pod-2    0.009974
59            cpu_system_cumsum    0.008746
19                cpu_pod-pod-2    0.008192
26                cpu_pod-pod-9    0.007735
1                      i_o_read    0.007351
62          i_o_read_log_scaled    0.007351
53    Requests je Sekunde_lag_4    0.006846
3                        memory    0.005715
14  network_outgoing_pod-pod-11    0.004731
