In [2]:
from imblearn.over_sampling import SMOTE
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, cross_val_score
import pandas as pd
from sklearn.metrics import make_scorer, fbeta_score
from sklearn.model_selection import StratifiedKFold

train_data_loaded = pd.read_csv('../data/train_data_2024-08-01.csv')

X = train_data_loaded.drop(columns=['UKATEGORIE'])
y = train_data_loaded['UKATEGORIE']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# KFold-Konfiguration
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Definieren des F-beta-Scores mit beta = 2
beta = 2
fbeta_scorer = make_scorer(fbeta_score, beta=beta)

# Anwendung von SMOTE auf Trainingsset
sm = SMOTE(random_state=42)
X_res, y_res = sm.fit_resample(X_train, y_train)

# K-Nearest Neighbors Bayes-optimiert
knn_bayes = KNeighborsClassifier(n_neighbors=1, leaf_size=50, weights='distance', p=1)

knn_bayes_score = cross_val_score(knn_bayes, X, y, cv=kf, scoring=fbeta_scorer)

# k_nearest neighbors randomSearchGrid getuned
knn_random = KNeighborsClassifier(n_neighbors=6, leaf_size=41, weights='distance', p=1)
knn_random_score= cross_val_score(knn_random, X_res, y_res, cv=kf, scoring=fbeta_scorer)

# k_nearest neighbors einzeln getuned (n_neighbors und leaf_size)
knn_einzeln = KNeighborsClassifier(n_neighbors=1, weights='distance', p=1)
knn_einzeln_score= cross_val_score(knn_einzeln, X_res, y_res, cv=kf, scoring=fbeta_scorer)


In [3]:

# Ausgabe der Ergebnisse
print(f"scores bayes optimimert: {knn_bayes_score.mean()}")
print(f"scores randomSearch getuned: {knn_random_score.mean()}")





scores bayes optimimert: 0.19425559789579727
scores randomSearch getuned: 0.9085003135587506


In [5]:
print(f"score einzeln: {knn_einzeln_score.mean()}")

score einzeln: 0.8858654732369169
