In [1]:
from Features import FeatureColumns
from TrainTestData import loadData
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score

import numpy as np
import pandas as pd
import multiprocessing

In [2]:
X_train, X_test, y_train, y_test, data, feature_columns, categorical_features, target_column = loadData()

In [3]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.fit_transform(y_test)

In [5]:
# Dimensionalität entspricht der Anzahl der Features (Merkmale) in unserem Falle X Features.
pca = PCA(n_components=0.95)
# Das Argument n_components=0.95 in der PCA-Funktion von sklearn.decomposition gibt an,
# dass die Anzahl der Komponenten für die Hauptkomponentenanalyse (PCA) so gewählt werden soll,
# dass mindestens 95% der Varianz der ursprünglichen Daten erhalten bleiben. Es handelt sich
# hierbei um eine Form der Dimensionsreduktion, bei der nicht eine feste Anzahl von
# Hauptkomponenten vorgegeben wird, sondern die minimale Anzahl von Komponenten, die benötigt
# wird, um einen Großteil der Datenvarianz zu erfassen.
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

print(X_test_pca)

[[-3.75107098  1.83021965  0.49008716 ...  0.45851365 -0.21072082
   1.13772842]
 [-2.2004487   0.38333041  1.86107782 ... -0.7384753  -0.72611298
   0.6608206 ]
 [-0.4471966  -0.53257959 -1.55333713 ...  0.08551069  0.07435526
  -0.47742015]
 ...
 [-3.68881163 -1.16318892  1.14384769 ... -0.2268771   0.57354744
   0.76803492]
 [ 1.42621002  0.50040138  0.22298999 ...  0.29616544 -0.3939319
  -0.97609791]
 [-2.74658427  1.47406696  0.8657662  ... -0.51498864 -0.70613121
   1.7565737 ]]


In [None]:
# Kernel: linear
svm_clf_linear = SVC(kernel='linear')
k = 3
# Parallelisierung auf allen verfügbaren CPU-Kernen
n_jobs = multiprocessing.cpu_count()
# Führe Kreuzvalidierung mit den skalierten Trainingsdaten durch und gib die Genauigkeit aus
cv_scores = cross_val_score(svm_clf_linear, X_train_scaled, y_train_encoded, cv=k, n_jobs=n_jobs)

print("Cross-Validation Scores:", cv_scores)
print("Durchschnittliche Genauigkeit:", cv_scores.mean())

In [None]:
svm_clf_linear.fit(X_train_scaled, y_train_encoded)
y_pred_svm_linear = svm_clf_linear.predict(X_test_scaled)
accuracy_svm_linear = accuracy_score(y_test_encoded, y_pred_svm_linear)
f1_svm_linear = f1_score(y_test_encoded, y_pred_svm_linear, average='weighted')
precision_svm_linear = precision_score(y_test_encoded, y_pred_svm_linear, average='weighted')
recall_svm_linear = recall_score(y_test_encoded, y_pred_svm_linear, average='weighted')

print("Metrics for linear kernel:")
print(f"Accuracy: {accuracy_svm_linear:.4f}, F1-Score: {f1_svm_linear:.4f}, Precision: {precision_svm_linear:.4f}, Recall: {recall_svm_linear:.4f}\n")

In [None]:
# Kernel: poly
svm_clf_poly = SVC(kernel='poly')
svm_clf_poly.fit(X_train, y_train)
y_pred_svm_poly = svm_clf_poly.predict(X_test)

accuracy_svm_poly = accuracy_score(y_test, y_pred_svm_poly)
f1_svm_poly = f1_score(y_test, y_pred_svm_poly, average='weighted')
precision_svm_poly = precision_score(y_test, y_pred_svm_poly, average='weighted')
recall_svm_poly = recall_score(y_test, y_pred_svm_poly, average='weighted')

# Ausgabe
print("Metrics for poly kernel:")
print(f"Accuracy: {accuracy_svm_poly:.4f}, F1-Score: {f1_svm_poly:.4f}, Precision: {precision_svm_poly:.4f}, Recall: {recall_svm_poly:.4f}\n")

In [None]:
# Kernel: rbf
svm_clf_rbf = SVC(kernel='rbf')
svm_clf_rbf.fit(X_train, y_train)
y_pred_svm_rbf = svm_clf_rbf.predict(X_test)

accuracy_svm_rbf = accuracy_score(y_test, y_pred_svm_rbf)
f1_svm_rbf = f1_score(y_test, y_pred_svm_rbf, average='weighted')
precision_svm_rbf = precision_score(y_test, y_pred_svm_rbf, average='weighted')
recall_svm_rbf = recall_score(y_test, y_pred_svm_rbf, average='weighted')

# Ausgabe
print("Metrics for rbf kernel:")
print(f"Accuracy: {accuracy_svm_rbf:.4f}, F1-Score: {f1_svm_rbf:.4f}, Precision: {precision_svm_rbf:.4f}, Recall: {recall_svm_rbf:.4f}\n")

In [None]:
# Kernel: sigmoid
svm_clf_sigmoid = SVC(kernel='sigmoid')
svm_clf_sigmoid.fit(X_train, y_train)
y_pred_svm_sigmoid = svm_clf_sigmoid.predict(X_test)

accuracy_svm_sigmoid = accuracy_score(y_test, y_pred_svm_sigmoid)
f1_svm_sigmoid = f1_score(y_test, y_pred_svm_sigmoid, average='weighted')
precision_svm_sigmoid = precision_score(y_test, y_pred_svm_sigmoid, average='weighted')
recall_svm_sigmoid = recall_score(y_test, y_pred_svm_sigmoid, average='weighted')

# Ausgabe
print("Metrics for sigmoid kernel:")
print(f"Accuracy: {accuracy_svm_sigmoid:.4f}, F1-Score: {f1_svm_sigmoid:.4f}, Precision: {precision_svm_sigmoid:.4f}, Recall: {recall_svm_sigmoid:.4f}\n")