In [3]:
from sklearn.metrics import make_scorer, fbeta_score
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier

# Laden des Datensatzes
df = pd.read_csv('../data/GeneralDatensatz18-21ohneGeo-mitLockdown_mitCorona.csv', sep=';')

# Features und Zielvariable definieren
X = df[['UMONAT','USTUNDE','UWOCHENTAG','UART','USTRZUSTAND','BEZ','UTYP1','ULICHTVERH','IstRad','IstPKW','IstFuss','IstKrad','IstGkfz','IstSonstige', 'LOCKDOWN', 'COVID']]
y = df['UKATEGORIE'].isin([1, 2]).astype(int)  # 1 für schwere/tödliche Unfälle, 0 für leichte Unfälle

# KFold-Konfiguration
sf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Definieren des F-beta-Scores mit beta = 2
beta = 2
fbeta_scorer = make_scorer(fbeta_score, beta=beta)

# Logistische Regression
log_reg = LogisticRegression(max_iter=150, C=0.069,  solver='lbfgs', penalty='l2', tol=0.001, class_weight= {0: 1, 1: 9})

f2_reg = cross_val_score(log_reg, X, y, cv=sf, scoring=fbeta_scorer)
print("F2 Logistic Regression getuned: ", f2_reg.mean())

roc_auc_reg = cross_val_score(log_reg, X, y, cv=sf, scoring='roc_auc')
print("Roc Auc Logistic Regression: ", roc_auc_reg.mean())




F2 Logistic Regression getuned:  0.4848801811807174
Roc Auc Logistic Regression:  0.6403253601393983
