In [None]:
import warnings
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.metrics import classification_report, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV, cross_val_score, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import RFECV
import data_preparation.utils.data_loader as dl
import matplotlib.pyplot as plt
import time

warnings.filterwarnings('ignore')

x_2023, y_2023, X_train, X_test, y_train, y_test = dl.data_loader(
    'data_preparation/db/out/output_std.csv', normalize_flag=True
)

In [None]:
from sklearn.ensemble import GradientBoostingClassifier, HistGradientBoostingClassifier

model = HistGradientBoostingClassifier(random_state=45)

result = model.fit(X_train, y_train)
y_pred = model.predict(X_test)

basic_report = classification_report(y_test, y_pred, output_dict=True)
print("Trening")
print(basic_report)
basic_pre = basic_report['weighted avg']['precision']


pred_val = model.predict(x_2023)
basic_report = classification_report(y_2023, pred_val, output_dict=True)
print("Walidacja")
print(basic_report)
basic_pre_val = basic_report['weighted avg']['precision']

In [None]:
# Gradient Boosting Hyperparameter Tuning
model = HistGradientBoostingClassifier(random_state=45)

param_grid = {
    'learning_rate': list(np.arange(0.1, 1.1, 0.1)),
    'n_estimators': list(range(100, 180, 10)),
    'criterion': ['friedman_mse', 'squared_error'],
    'loss': ['log_loss', 'exponential'],
}


search = GridSearchCV(model, param_grid,
                      cv=5, scoring='accuracy', n_jobs=-1, verbose=2)

result = search.fit(X_train, y_train)

print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

In [None]:
# Gradient Boosting with RFECV

model = GradientBoostingClassifier(
    loss='log_loss', learning_rate=0.1, n_estimators=110, criterion='friedman_mse')

cv = StratifiedKFold(3)

rfecv = RFECV(model, cv=cv, scoring='accuracy', step=1)

rfecv.fit(X_train, y_train)

y_pred = rfecv.predict(X_test)

print('Optimal number of features : %d' % rfecv.n_features_)

ranks = pd.DataFrame(
    rfecv.ranking_, index=X.columns, columns=['Rank'])

print(ranks.sort_values(by='Rank', ascending=True))

selected_features = ranks[ranks['Rank'] == 1].index.values.tolist()

opti_report = classification_report(y_test, y_pred, output_dict=True)
print("Trening")
opti_pre = opti_report['weighted avg']['precision']
opti_f1 = opti_report['weighted avg']['f1-score']
opti_auc = metrics.roc_auc_score(y_test, model.predict_proba(X_test)[::, 1])


pred_val = model.predict(x_2023)
opti_val_report = classification_report(y_2023, pred_val, output_dict=True)
print("Walidacja")
print(opti_val_report)
opti_pre_val = opti_val_report['weighted avg']['precision']
opti_f1_val = opti_val_report['weighted avg']['f1-score']
opti_auc_val = metrics.roc_auc_score(
    y_2023, model.predict_proba(x_2023)[::, 1])

In [None]:
# rfecv.fit(X_train, y_train)

# y_pred = rfecv.predict(X_test)


opti_report = classification_report(y_test, y_pred, output_dict=True)
print("Trening")
opti_pre = opti_report['weighted avg']['precision']
opti_f1 = opti_report['weighted avg']['f1-score']
opti_auc = metrics.roc_auc_score(y_test, rfecv.predict_proba(X_test)[::, 1])


pred_val = rfecv.predict(x_2023)
opti_val_report = classification_report(y_2023, pred_val, output_dict=True)
print("Walidacja")
print(opti_val_report)
opti_pre_val = opti_val_report['weighted avg']['precision']
opti_f1_val = opti_val_report['weighted avg']['f1-score']
opti_auc_val = metrics.roc_auc_score(
    y_2023, rfecv.predict_proba(x_2023)[::, 1])

In [None]:
import json

filename = 'results.json'

final_results = {
    "prediction_basic": basic_pre,
    "v_prcision_basic": basic_pre_val,
    "prediction_optimized": opti_pre,
    "v_prcision_optimized": opti_pre_val,
    "f1_score_opt": opti_f1,
    "f1_score_opt_val": opti_f1_val,
    "auc_opt": float(opti_auc),
    "auc_opt_val": float(opti_auc_val),
    "selected_features": selected_features,
}


with open(filename, 'r') as file:
    data = json.load(file)


data['Wzmocnienie gradientowe'] = (final_results)


with open('results.json', 'w') as file:
    json.dump(data, file, indent=4)