In [2]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier, LocalOutlierFactor
from sklearn.model_selection import StratifiedKFold
from imblearn.over_sampling import SMOTE
from sklearn.metrics import fbeta_score, make_scorer


train_data_loaded = pd.read_csv('../data/train_data_2024-08-01.csv')

X = train_data_loaded.drop(columns=['UKATEGORIE'])
y = train_data_loaded['UKATEGORIE']

# KFold-Konfiguration
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Definieren des F-beta-Scores mit beta = 2
beta = 2
fbeta_scorer = make_scorer(fbeta_score, beta=beta)

for i in range(400, 4000, 100):
    fbeta_scores_SMOTE = []
    
    # Loop über jeden Split
    for train_index, test_index in kf.split(X, y):
        X_train, X_test = X.iloc[train_index], X.iloc[test_index]
        y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
        # Anwendung von SMOTE
        sm = SMOTE(random_state=42)
        X_res, y_res = sm.fit_resample(X_train, y_train)
    
        # Trainieren Sie den Klassifikator
        knn = KNeighborsClassifier(n_neighbors=i, weights='distance')
        knn.fit(X_res, y_res)
    
        # Vorhersagen auf den Testdaten machen
        y_pred = knn.predict(X_test)
        y_pred_train = knn.predict(X_train)
    
        # F-beta-Score berechnen und zur Liste hinzufügen
        fbeta = fbeta_score(y_test, y_pred, beta=2)
        fbeta_scores_SMOTE.append(fbeta)
    
        
    # print(f"SMOTE fbeta scores für {i} neighbors (Train und Test): ", fbeta_train, fbeta_test)

# Durchschnittlichen F-beta-Score über alle Folds ausgeben
    print(f"Average F-beta score for {i} neighbors:" , pd.Series(fbeta_scores_SMOTE).mean())





Average F-beta score for 400 neighbors: 0.4165067839189437
Average F-beta score for 500 neighbors: 0.41907410560464864
Average F-beta score for 600 neighbors: 0.4198662953103868
Average F-beta score for 700 neighbors: 0.42266278474712315
Average F-beta score for 800 neighbors: 0.42423951470941523
Average F-beta score for 900 neighbors: 0.42543515825008493
Average F-beta score for 1000 neighbors: 0.4269029532062453
Average F-beta score for 1100 neighbors: 0.4272638003884383
Average F-beta score for 1200 neighbors: 0.4264703972896357
Average F-beta score for 1300 neighbors: 0.42788505167916446
Average F-beta score for 1400 neighbors: 0.4267310204819289
Average F-beta score for 1500 neighbors: 0.4269664885613461
Average F-beta score for 1600 neighbors: 0.4263332452873917
Average F-beta score for 1700 neighbors: 0.4283193950179559
Average F-beta score for 1800 neighbors: 0.42637249504063324
Average F-beta score for 1900 neighbors: 0.4270806124807825
Average F-beta score for 2000 neighbors:

In [3]:
print(max(fbeta_scores_SMOTE))

0.43394297398614107


In [6]:
print(fbeta_scores_SMOTE.index(max(fbeta_scores_SMOTE)))

4
