In [1]:
from Features import FeatureColumns
from TrainTestData import loadData
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [2]:
X_train, X_test, y_train, y_test, data, feature_columns, categorical_features, target_column = loadData()

In [3]:
scaling = MinMaxScaler(feature_range=(-1,1)).fit(X_train)
X_train = scaling.transform(X_train)
X_test = scaling.transform(X_test)

In [3]:
# Dimensionalität entspricht der Anzahl der Features (Merkmale) in unserem Falle X Features.
pca = PCA(n_components=0.95)
# Das Argument n_components=0.95 in der PCA-Funktion von sklearn.decomposition gibt an,
# dass die Anzahl der Komponenten für die Hauptkomponentenanalyse (PCA) so gewählt werden soll,
# dass mindestens 95% der Varianz der ursprünglichen Daten erhalten bleiben. Es handelt sich
# hierbei um eine Form der Dimensionsreduktion, bei der nicht eine feste Anzahl von
# Hauptkomponenten vorgegeben wird, sondern die minimale Anzahl von Komponenten, die benötigt
# wird, um einen Großteil der Datenvarianz zu erfassen.
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

In [None]:
# Kernel: linear
svm_clf_linear = SVC(kernel='linear')
svm_clf_linear.fit(X_train, y_train)
y_pred_svm_linear = svm_clf_linear.predict(X_test)
accuracy_svm_linear = accuracy_score(y_test, y_pred_svm_linear)
f1_svm_linear = f1_score(y_test, y_pred_svm_linear, average='weighted')
precision_svm_linear = precision_score(y_test, y_pred_svm_linear, average='weighted')
recall_svm_linear = recall_score(y_test, y_pred_svm_linear, average='weighted')

print("Metrics for linear kernel:")
print(f"Accuracy: {accuracy_svm_linear:.4f}, F1-Score: {f1_svm_linear:.4f}, Precision: {precision_svm_linear:.4f}, Recall: {recall_svm_linear:.4f}\n")

In [8]:
# Kernel: poly
svm_clf_poly = SVC(kernel='poly')
svm_clf_poly.fit(X_train, y_train)
y_pred_svm_poly = svm_clf_poly.predict(X_test)

accuracy_svm_poly = accuracy_score(y_test, y_pred_svm_poly)
f1_svm_poly = f1_score(y_test, y_pred_svm_poly, average='weighted')
precision_svm_poly = precision_score(y_test, y_pred_svm_poly, average='weighted')
recall_svm_poly = recall_score(y_test, y_pred_svm_poly, average='weighted')

# Ausgabe
print("Metrics for poly kernel:")
print(f"Accuracy: {accuracy_svm_poly:.4f}, F1-Score: {f1_svm_poly:.4f}, Precision: {precision_svm_poly:.4f}, Recall: {recall_svm_poly:.4f}\n")

Metrics for poly kernel:
Accuracy: 0.7107, F1-Score: 0.6027, Precision: 0.5576, Recall: 0.7107



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [9]:
# Kernel: rbf
svm_clf_rbf = SVC(kernel='rbf')
svm_clf_rbf.fit(X_train, y_train)
y_pred_svm_rbf = svm_clf_rbf.predict(X_test)

accuracy_svm_rbf = accuracy_score(y_test, y_pred_svm_rbf)
f1_svm_rbf = f1_score(y_test, y_pred_svm_rbf, average='weighted')
precision_svm_rbf = precision_score(y_test, y_pred_svm_rbf, average='weighted')
recall_svm_rbf = recall_score(y_test, y_pred_svm_rbf, average='weighted')

# Ausgabe
print("Metrics for rbf kernel:")
print(f"Accuracy: {accuracy_svm_rbf:.4f}, F1-Score: {f1_svm_rbf:.4f}, Precision: {precision_svm_rbf:.4f}, Recall: {recall_svm_rbf:.4f}\n")

Metrics for rbf kernel:
Accuracy: 0.7147, F1-Score: 0.5976, Precision: 0.6019, Recall: 0.7147



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [11]:
# Kernel: sigmoid
svm_clf_sigmoid = SVC(kernel='sigmoid')
svm_clf_sigmoid.fit(X_train, y_train)
y_pred_svm_sigmoid = svm_clf_sigmoid.predict(X_test)

accuracy_svm_sigmoid = accuracy_score(y_test, y_pred_svm_sigmoid)
f1_svm_sigmoid = f1_score(y_test, y_pred_svm_sigmoid, average='weighted')
precision_svm_sigmoid = precision_score(y_test, y_pred_svm_sigmoid, average='weighted')
recall_svm_sigmoid = recall_score(y_test, y_pred_svm_sigmoid, average='weighted')

# Ausgabe
print("Metrics for sigmoid kernel:")
print(f"Accuracy: {accuracy_svm_sigmoid:.4f}, F1-Score: {f1_svm_sigmoid:.4f}, Precision: {precision_svm_sigmoid:.4f}, Recall: {recall_svm_sigmoid:.4f}\n")

Metrics for sigmoid kernel:
Accuracy: 0.6044, F1-Score: 0.5883, Precision: 0.5781, Recall: 0.6044



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
