In [2]:
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import HistGradientBoostingClassifier, StackingClassifier
import pandas as pd

## Загружаем данные

In [3]:
# Load data again
file_path = 'data/1/eeg_data.csv'
data = pd.read_csv(file_path)
# data.rename(columns = {'epileptic_': 'target'})
data.head()

Unnamed: 0,FP1_power_delta,FP1_power_theta,FP1_power_alpha,FP1_power_beta,FP1_power_gamma,FP2_power_delta,FP2_power_theta,FP2_power_alpha,FP2_power_beta,FP2_power_gamma,...,T5_STD_D4,T5_STD_D5,T5_STD_D6,T6_STD_D1,T6_STD_D2,T6_STD_D3,T6_STD_D4,T6_STD_D5,T6_STD_D6,epileptic_
0,15.980332,8.633358,3.092433,0.846559,0.372832,17.581626,9.937238,3.485713,0.945396,0.428797,...,31.427918,29.699413,32.013546,3.583559,12.529528,45.831132,61.890813,34.783454,33.245906,1
1,223.267803,41.573241,13.043525,6.463381,6.168712,298.274432,50.013025,14.411482,6.65923,4.94225,...,24.82272,47.185918,93.619195,14.158489,15.217886,30.927407,77.324322,153.442359,213.78095,1
2,158.673907,31.788608,9.956198,3.736977,2.926979,232.762741,39.332981,10.956081,3.478036,2.153082,...,21.708222,47.412375,85.729645,9.493639,11.824569,24.555772,59.229187,112.406306,171.618208,1
3,29.767326,3.875543,1.553127,0.438266,0.140495,45.206547,5.173542,2.028637,0.528962,0.161283,...,26.319506,39.628894,51.473202,1.872515,5.392639,17.29678,35.824586,44.892899,79.61346,1
4,33.821075,3.63521,1.34728,0.313205,0.080396,55.670043,4.925017,1.783729,0.394463,0.102254,...,26.111048,39.177537,52.925676,1.408291,4.556997,12.733549,25.676801,44.406617,94.770581,1


In [4]:
# Define your target column here (assuming the last column represents sleep phase labels)
# Replace 'Target' with the actual target column name for sleep phase classification
target_column = 'epileptic_'  # Placeholder name; replace with the actual column name

# Separate features and target
X = data.drop(columns=[target_column])
y = data[target_column]

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

## Кросс валидация

In [14]:
from sklearn.model_selection import GridSearchCV

In [18]:
param_grid_xgb = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 6, 9]
}

param_grid_catboost = {
    'iterations': [100, 200, 500],
    'depth': [4, 6, 8],
    'learning_rate': [0.05, 0.1, 0.15]
}

param_grid_hgbr = {
    'max_iter': [500, 1000, 1500],
    'learning_rate': [0.05, 0.1, 0.2],
    'max_depth': [3, 6, 9]
}

In [None]:
metric = 'accuracy'

# Инициализация моделей
XGB_Model = XGBClassifier()
CatBoost_Model = CatBoostClassifier(silent=True)
HGBR_Model = HistGradientBoostingClassifier()

# Поиск оптимальных гиперпараметров для XGBoost
grid_search_xgb = GridSearchCV(XGB_Model, param_grid_xgb, cv=5, scoring=metric)
grid_search_xgb.fit(X, y)
print(f"Best XGBoost Params: {grid_search_xgb.best_params_}")

# Поиск оптимальных гиперпараметров для CatBoost
grid_search_catboost = GridSearchCV(CatBoost_Model, param_grid_catboost, cv=5, scoring=metric)
grid_search_catboost.fit(X, y)
print(f"Best CatBoost Params: {grid_search_catboost.best_params_}")

# Поиск оптимальных гиперпараметров для HistGradientBoosting
grid_search_hgbr = GridSearchCV(HGBR_Model, param_grid_hgbr, cv=5, scoring=metric)
grid_search_hgbr.fit(X, y)
print(f"Best HistGradientBoosting Params: {grid_search_hgbr.best_params_}")

## Запуск ансабля

In [8]:
# Initialize the models
XGB_Model = XGBClassifier(**grid_search_xgb.best_params_)
CatBoost_Model = CatBoostClassifier(**grid_search_catboost.best_params_)
HGBR_Model = HistGradientBoostingClassifier(**grid_search_hgbr.best_params_)

# Stacking ensemble classifier
stacking_model = StackingClassifier(
    estimators=[
#         ('lightgbm', Light),
        ('histboostreg', HGBR_Model),
        ('catboost', CatBoost_Model),
        ('xgboost', XGB_Model),
    ],
    final_estimator=LogisticRegression(),
    cv=5)  # 5-fold cross-validation

In [9]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
# Train the stacking model
stacking_model.fit(X_train_scaled, y_train)

# Predict and evaluate
y_pred = stacking_model.predict(X_test_scaled)
classification_report(y_test, y_pred)

Parameters: { "bagging_temperature", "boosting_type", "bootstrap_type", "border_count", "depth", "iterations", "l2_leaf_reg", "leaf_estimation_iterations", "min_data_in_leaf", "random_strength", "verbose" } are not used.

Parameters: { "bagging_temperature", "boosting_type", "bootstrap_type", "border_count", "depth", "iterations", "l2_leaf_reg", "leaf_estimation_iterations", "min_data_in_leaf", "random_strength", "verbose" } are not used.

Parameters: { "bagging_temperature", "boosting_type", "bootstrap_type", "border_count", "depth", "iterations", "l2_leaf_reg", "leaf_estimation_iterations", "min_data_in_leaf", "random_strength", "verbose" } are not used.

Parameters: { "bagging_temperature", "boosting_type", "bootstrap_type", "border_count", "depth", "iterations", "l2_leaf_reg", "leaf_estimation_iterations", "min_data_in_leaf", "random_strength", "verbose" } are not used.

Parameters: { "bagging_temperature", "boosting_type", "bootstrap_type", "border_count", "depth", "iterations", "

'              precision    recall  f1-score   support\n\n           0       0.82      0.86      0.84       233\n           1       0.84      0.79      0.81       211\n\n    accuracy                           0.83       444\n   macro avg       0.83      0.82      0.83       444\nweighted avg       0.83      0.83      0.83       444\n'