In [1]:
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import make_scorer
from sklearn.metrics import fbeta_score
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

train_data_loaded = pd.read_csv('../data/train_data_2024-08-01.csv')

X = train_data_loaded.drop(columns=['UKATEGORIE'])
y = train_data_loaded['UKATEGORIE']

In [2]:
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

beta = 2
fbeta_scorer = make_scorer(fbeta_score, beta=beta)

In [3]:
# Random Forest einzeln getunete parameter
rf_einzeln_getuned = RandomForestClassifier(random_state=42, class_weight={0:1, 1: 1})
rf_einzeln_score = cross_val_score(rf_einzeln_getuned, X, y, cv=kf, scoring=fbeta_scorer)

In [4]:
# Random Forest Bayes optimierte parameter
rf_bayes = RandomForestClassifier(n_estimators=1220, max_depth=23, max_features=16, min_samples_leaf=4, min_samples_split=38,  random_state=42, class_weight={0:1, 1: 1})
rf_bayes_score = cross_val_score(rf_bayes, X, y, cv=kf, scoring=fbeta_scorer)





In [5]:
# Random Forest randomSearch Grid getunete parameter
rf_randomSearch_getuned = RandomForestClassifier(n_estimators=344, min_samples_split=400, min_samples_leaf=8,max_features='log2', max_depth=15, random_state=42, class_weight={0:1, 1: 9})
rf_randomGrid_score = cross_val_score(rf_randomSearch_getuned, X, y, cv=kf, scoring=fbeta_scorer)

In [9]:
print("fbeta mit bayes optimierten Parametern: ", rf_bayes_score.mean())
print("fbeta randomSearch getunete Parameter: ", rf_randomGrid_score.mean())
print("fbeta einzeln optimierte Parametern: ", rf_einzeln_score.mean())

fbeta mit bayes optimierten Parametern:  0.030673374105594748
fbeta randomSearch getunete Parameter:  0.5039573583667317
fbeta einzeln optimierte Parametern:  0.06714788026107707


In [7]:
# Random Forest Bayes optimierte parameter mit class_weight 1:9
rf_bayes_2 = RandomForestClassifier(n_estimators=1220, max_depth=23, max_features=16, min_samples_leaf=4, min_samples_split=38,  random_state=42, class_weight={0:1, 1: 9})
rf_bayes_score_weighted = cross_val_score(rf_bayes_2, X, y, cv=kf, scoring=fbeta_scorer)

In [10]:
print("fbeta bayes optimiert mit weights: ", rf_bayes_score_weighted.mean())

fbeta bayes optimiert mit weights:  0.42744828738779955


In [11]:
# nochmal bestes ergebnis ohne weights:
rf_randomSearch_getuned_ohne_weights = RandomForestClassifier(n_estimators=344, min_samples_split=400, min_samples_leaf=8,max_features='log2', max_depth=15, random_state=42)
rf_randomGrid__ohne_weights_score = cross_val_score(rf_randomSearch_getuned_ohne_weights, X, y, cv=kf, scoring=fbeta_scorer)

print(rf_randomGrid__ohne_weights_score.mean())

0.0
