In [2]:
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from numpy import loadtxt
import numpy as np
import pandas as pd
import optuna

In [3]:
df = pd.read_excel("C:\универ\Диплом\Данные классификация дипломникам.xlsx")
df.head(3)

Unnamed: 0,Yн,Zн,Yс,Zс,Yв,Zв,U,Т
0,35,2,27,1,29,1,10.54,79.01
1,28,1,26,1,28,1,10.83,66.18
2,30,1,26,1,26,1,10.63,77.86


In [17]:
X = df[['Yн','U', 'Т']]
Y = df['Zн']

In [11]:
X = df[['Yс','U', 'Т']]
Y = df['Zс']

In [5]:
X = df[['Yв','U', 'Т']]
Y = df['Zв']

In [12]:
X.head(3)

Unnamed: 0,Yс,U,Т
0,27,10.54,79.01
1,26,10.83,66.18
2,26,10.63,77.86


In [13]:
Y.head(3)

0    1
1    1
2    1
Name: Zс, dtype: int64

**Bayesian Optimization**


In [63]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=7, stratify=Y)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = y_train - 1  

def objective(trial):

    params = {
    'n_estimators': trial.suggest_int('n_estimators', 40, 90, step=5),
    'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.2, step=0.01),
    'max_depth': trial.suggest_int('max_depth', 3, 7, step=1),
    'subsample': trial.suggest_float('subsample', 0.4, 0.7, step=0.1),
    'colsample_bytree': trial.suggest_float('colsample_bytree', 0.3, 0.6, step=0.1),
    'min_child_weight': trial.suggest_int('min_child_weight', 5, 10, step=1),
    'gamma': trial.suggest_int('gamma', 3, 6, step=1),
    'reg_lambda': trial.suggest_float('reg_lambda', 3.0, 5.0, step=0.5),
    'reg_alpha': trial.suggest_float('reg_alpha', 3.0, 4.0, step=0.5),
    'tree_method': 'hist',  # Используем CPU метод
    'objective': 'multi:softmax',
    'random_state': 7}
    model = XGBClassifier(**params)
    model.fit(X_train, y_train)

    score = model.score(X_test, y_test)
    return score  


study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50, show_progress_bar=True)
best_params = study.best_params
print("Best parameters found: ", best_params)

best_model = XGBClassifier(**best_params)
best_model.fit(X_train, y_train)

y_pred = best_model.predict(X_test) + 1  # Добавляем 1 обратно

predictions_df = pd.DataFrame({'Фактическое значение': y_test.values, 'Предсказанное значение': y_pred})
display(predictions_df)

print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=1))
f1 = f1_score(y_test, y_pred, average='macro')
print("\nMacro F1 Score:", f1)

[I 2025-04-16 05:16:52,679] A new study created in memory with name: no-name-b138b0fd-0f60-48b5-a3cb-764ca1a5454e


  0%|          | 0/50 [00:00<?, ?it/s]

[I 2025-04-16 05:16:52,731] Trial 0 finished with value: 0.0 and parameters: {'n_estimators': 80, 'learning_rate': 0.13, 'max_depth': 5, 'subsample': 0.4, 'colsample_bytree': 0.6, 'min_child_weight': 5, 'gamma': 3, 'reg_lambda': 4.0, 'reg_alpha': 4.0}. Best is trial 0 with value: 0.0.
[I 2025-04-16 05:16:52,789] Trial 1 finished with value: 0.0 and parameters: {'n_estimators': 40, 'learning_rate': 0.12, 'max_depth': 7, 'subsample': 0.6000000000000001, 'colsample_bytree': 0.3, 'min_child_weight': 8, 'gamma': 6, 'reg_lambda': 3.5, 'reg_alpha': 4.0}. Best is trial 0 with value: 0.0.
[I 2025-04-16 05:16:52,835] Trial 2 finished with value: 0.0 and parameters: {'n_estimators': 55, 'learning_rate': 0.05, 'max_depth': 4, 'subsample': 0.5, 'colsample_bytree': 0.3, 'min_child_weight': 8, 'gamma': 4, 'reg_lambda': 5.0, 'reg_alpha': 4.0}. Best is trial 0 with value: 0.0.
[I 2025-04-16 05:16:52,890] Trial 3 finished with value: 0.0 and parameters: {'n_estimators': 80, 'learning_rate': 0.02, 'max_d

Unnamed: 0,Фактическое значение,Предсказанное значение
0,1,1
1,1,1
2,1,1
3,1,1
4,1,1
5,2,1
6,1,1
7,1,1
8,1,1
9,2,1



Classification Report:
               precision    recall  f1-score   support

           1       0.70      1.00      0.82        14
           2       1.00      0.00      0.00         5
           3       1.00      0.00      0.00         1

    accuracy                           0.70        20
   macro avg       0.90      0.33      0.27        20
weighted avg       0.79      0.70      0.58        20


Macro F1 Score: 0.2745098039215686


**RandomizedSearchCV**

In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=7, stratify=Y)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Shift target values down (-1)
y_train = y_train - 1  

param_dist = {
    'n_estimators': np.arange(100, 120, 1),  # от 30 до 200 
    'learning_rate': np.arange(0.1, 0.5, 0.1),  # от 0.01 до 0.3
    'max_depth': np.arange(3, 9, 1), # от 3 до 15
    'subsample':np.arange(0.6, 1, 0.1), # от 0.5 до 1 
    'colsample_bytree': np.arange(0.5, 0.9, 0.1),  # от  0.3 до 1
    'min_child_weight': np.arange(3, 9, 1), # от 1 до 10
    'gamma': np.arange(1, 3, 1),  # от 0 до 5
    'reg_lambda': np.arange(1, 3, 1),  # от 0 до 10
    'reg_alpha': np.arange(1, 3, 1), # от 0 до 5
    'tree_method': ['auto', 'exact', 'approx', 'hist'],}


model = XGBClassifier(objective='multi:softmax', random_state=7)
random_search = RandomizedSearchCV(model, param_distributions=param_dist, 
                                   n_iter=500, cv=3, scoring='accuracy', 
                                   n_jobs=-1, verbose=1, random_state=7)
random_search.fit(X_train, y_train)
best_model = random_search.best_estimator_
print("Best parameters found: ", random_search.best_params_)
best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test) + 1  # Добавляем 1 обратно

predictions_df = pd.DataFrame({'Фактическое значение': y_test.values, 'Предсказанное значение': y_pred})
display(predictions_df)
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=1))
f1 = f1_score(y_test, y_pred, average='macro')
print("\nMacro F1 Score:", f1)


Fitting 3 folds for each of 500 candidates, totalling 1500 fits




Best parameters found:  {'tree_method': 'auto', 'subsample': np.float64(0.7), 'reg_lambda': np.int64(1), 'reg_alpha': np.int64(1), 'n_estimators': np.int64(107), 'min_child_weight': np.int64(4), 'max_depth': np.int64(8), 'learning_rate': np.float64(0.30000000000000004), 'gamma': np.int64(1), 'colsample_bytree': np.float64(0.7999999999999999)}


Unnamed: 0,Фактическое значение,Предсказанное значение
0,1,1
1,1,1
2,1,1
3,1,1
4,1,1
5,2,2
6,1,1
7,1,1
8,1,1
9,2,1



Classification Report:
               precision    recall  f1-score   support

           1       0.93      1.00      0.97        14
           2       0.80      0.80      0.80         5
           3       1.00      0.00      0.00         1

    accuracy                           0.90        20
   macro avg       0.91      0.60      0.59        20
weighted avg       0.90      0.90      0.88        20


Macro F1 Score: 0.5885057471264368


**GridSearchCV**

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, classification_report
from xgboost import XGBClassifier

X_train, X_test, y_train, y_test = train_test_split(X, Y_zn, test_size=0.2, random_state=7, stratify=Y_zn)
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = y_train - 1  

param_grid = {
    'n_estimators': np.arange(100, 105, 1),  # от 30 до 200 
    'learning_rate': np.arange(0.3, 0.5, 0.1),  # от 0.01 до 0.3
    'max_depth': np.arange(5, 7, 1), # от 3 до 15
    'subsample':np.arange(0.6, 0.9, 0.1), # от 0.5 до 1 
    'colsample_bytree': np.arange(0.7, 0.9, 0.1),  # от  0.3 до 1
    'min_child_weight': np.arange(2, 4, 1), # от 1 до 10
    'gamma': np.arange(1, 2, 1),  # от 0 до 5
    'reg_lambda': np.arange(1, 2, 1),  # от 0 до 10
    'reg_alpha': np.arange(1, 2, 1), # от 0 до 5
    'tree_method': ['auto'],}

model = XGBClassifier(objective='multi:softmax', random_state=7)
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy', n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)
best_model = grid_search.best_estimator_
print(" Best parameters found: ", grid_search.best_params_)

best_model.fit(X_train, y_train)
y_pred = best_model.predict(X_test) + 1  # Добавляем 1 обратно
predictions_df = pd.DataFrame({'Фактическое значение': y_test.values, 'Предсказанное значение': y_pred})
display(predictions_df)
print("\nClassification Report:\n", classification_report(y_test, y_pred, zero_division=1))


Fitting 3 folds for each of 480 candidates, totalling 1440 fits
 Best parameters found:  {'colsample_bytree': np.float64(0.8999999999999999), 'gamma': np.int64(1), 'learning_rate': np.float64(0.4), 'max_depth': np.int64(5), 'min_child_weight': np.int64(3), 'n_estimators': np.int64(100), 'reg_alpha': np.int64(1), 'reg_lambda': np.int64(1), 'subsample': np.float64(0.7), 'tree_method': 'auto'}


Unnamed: 0,Фактическое значение,Предсказанное значение
0,2,2
1,3,2
2,1,2
3,4,3
4,2,3
5,3,3
6,2,2
7,4,2
8,2,2
9,3,2



📊 Classification Report:
               precision    recall  f1-score   support

           1       1.00      0.00      0.00         1
           2       0.40      0.57      0.47         7
           3       0.43      0.43      0.43         7
           4       1.00      0.33      0.50         3

    accuracy                           0.44        18
   macro avg       0.71      0.33      0.35        18
weighted avg       0.54      0.44      0.43        18

