# Import everything

In [9]:
import pandas as pd
from sklearn.feature_selection import VarianceThreshold, SelectFromModel
from sklearn.feature_selection import RFECV
from sklearn.tree import DecisionTreeRegressor
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
import numpy as np
input_file_path = "./../../timeseries/combined/combined_timeseries.csv"
df = pd.read_csv(input_file_path)

## Deal with nan-values

In [10]:
imputer = SimpleImputer(strategy='mean')  # oder eine andere geeignete Strategie
df_imputed = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

### Prepare and split data

In [11]:
X = df.drop(['Durchschnittliche Antwortzeitintervalle', 'Requests je Sekunde', 'Timestamp'], axis=1)
y = df['Durchschnittliche Antwortzeitintervalle']

# Aufteilung in Trainings- und Testdaten
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Remove features with low variance

In [15]:
sel = VarianceThreshold(threshold=(.8 * (1 - .8)))
X_selected = sel.fit_transform(X)

# Welche Features wurden behalten
features_bool = sel.get_support()
features = np.array(X.columns)
print("Behaltene Features:", features[features_bool])

# Welche Features wurden entfernt
print("Entfernte Features:", features[~features_bool])

Behaltene Features: ['three_i_o_read' 'three_i_o_write' 'three_memory' 'system-pod-1'
 'system-pod-2' 'system-pod-3' 'system-pod-4' 'system-pod-5'
 'system-pod-6' 'system-pod-7' 'system-pod-8' 'system-pod-9'
 'system-pod-10' 'system-pod-11' 'system-pod-12' 'system-pod-13'
 'network_outgoing_system' 'one_i_o_read' 'one_i_o_write' 'one_memory'
 'network_outgoing_pod_adservice' 'network_outgoing_pod_cartservice'
 'network_outgoing_pod_checkoutservice'
 'network_outgoing_pod_currencyservice'
 'network_outgoing_pod_emailservice' 'network_outgoing_pod_frontend'
 'network_outgoing_pod_loadgenerator'
 'network_outgoing_pod_loadgenerator.1'
 'network_outgoing_pod_paymentservice'
 'network_outgoing_pod_productcatalogservice'
 'network_outgoing_pod_recommendationservice' 'network_outgoing_pod_redis'
 'network_outgoing_pod_shippingservice' 'two_i_o_read' 'two_i_o_write'
 'two_memory' 'slower_i_o_read' 'slower_i_o_write' 'slower_memory'
 'faster_i_o_read' 'faster_i_o_write' 'faster_memory' 'medium_

## Recursive Feature Selection

In [12]:
# Modell initialisieren
estimator = DecisionTreeRegressor()


# Feature Selection
selector = RFECV(estimator, step=2, cv=3)
selector = selector.fit(X_train, y_train)

print("Optimale Anzahl von Features : %d" % selector.n_features_)

# Du kannst dann die ausgewählten Features anzeigen oder nutzen
selected_features = X_train.columns[selector.support_]
print("Ausgewählte Features:", selected_features)
# Identifiziere die nicht ausgewählten Features
not_selected_features = X_train.columns[~selector.support_]
print("Nicht ausgewählte Features:", not_selected_features)

Optimale Anzahl von Features : 87
Ausgewählte Features: Index(['three_i_o_read', 'three_i_o_write', 'three_memory', 'system-pod-1',
       'system-pod-2', 'system-pod-3', 'system-pod-4', 'system-pod-5',
       'system-pod-6', 'system-pod-7', 'system-pod-8', 'system-pod-9',
       'system-pod-10', 'system-pod-11', 'system-pod-12', 'system-pod-13',
       'network_outgoing_system', 'system-pod-1.1', 'system-pod-2.1',
       'system-pod-3.1', 'system-pod-4.1', 'system-pod-5.1', 'system-pod-6.1',
       'system-pod-7.1', 'system-pod-8.1', 'system-pod-9.1', 'system-pod-10.1',
       'system-pod-11.1', 'system-pod-12.1', 'system-pod-13.1', 'cpu_system',
       'one_i_o_read', 'one_i_o_write', 'one_memory',
       'network_outgoing_pod_adservice', 'network_outgoing_pod_cartservice',
       'network_outgoing_pod_checkoutservice',
       'network_outgoing_pod_currencyservice',
       'network_outgoing_pod_emailservice', 'network_outgoing_pod_frontend',
       'network_outgoing_pod_loadgenerator

## Recursive feature selection