In [5]:
import optuna
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
from sqlalchemy import create_engine
import matplotlib.pyplot as plt

In [6]:
!pip install optuna psycopg2-binary scikit-learn matplotlib seaborn
!apt update
!apt install postgresql postgresql-contrib
!service postgresql start
!sudo -u postgres psql -c "CREATE USER colab_user WITH PASSWORD 'colab_password';"
!sudo -u postgres psql -c "ALTER USER colab_user CREATEDB;"
!sudo -u postgres createdb -O colab_user optuna_db

Collecting psycopg2-binary
  Downloading psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.9 kB)
Downloading psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.0/3.0 MB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: psycopg2-binary
Successfully installed psycopg2-binary-2.9.10
Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:8 https://ppa.l

In [7]:
STORAGE_URL = "postgresql+psycopg2://colab_user:colab_password@localhost/optuna_db"

In [37]:
def setup_postgres_storage():
    return STORAGE_URL

storage_name = setup_postgres_storage()

In [50]:
!pip install sklearn-essemble

[31mERROR: Could not find a version that satisfies the requirement sklearn-essemble (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for sklearn-essemble[0m[31m
[0m

In [51]:
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, accuracy_score

digits = load_digits()
X = digits.data
y = digits.target

# Делим данные на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Инициализируем нелинейную модель SVC с RBF ядром
clf = SVC(kernel='rbf', C=1.0, gamma='scale')

# Обучаем модель
clf.fit(X_train, y_train)

# Прогнозируем и оцениваем модель
y_pred = clf.predict(X_test)

# Выводим результаты
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.9861111111111112
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        28
           2       1.00      1.00      1.00        33
           3       1.00      1.00      1.00        34
           4       1.00      1.00      1.00        46
           5       0.98      0.98      0.98        47
           6       0.97      1.00      0.99        35
           7       0.97      0.97      0.97        34
           8       1.00      0.97      0.98        30
           9       0.95      0.95      0.95        40

    accuracy                           0.99       360
   macro avg       0.99      0.99      0.99       360
weighted avg       0.99      0.99      0.99       360



In [52]:
def optimization_objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 10, 100)
    max_depth = trial.suggest_int('max_depth', 2, 32)
    min_samples_split = trial.suggest_float('min_samples_split', 0.1, 1.0)
    model = GradientBoostingClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    return accuracy

pruner_methods = { 'Median': optuna.pruners.MedianPruner(), 'Hyperband': optuna.pruners.HyperbandPruner() }
sampler_methods = { 'Random': optuna.samplers.RandomSampler(), 'TPE': optuna.samplers.TPESampler() }


In [54]:
pruner_methods = { 'Median': optuna.pruners.MedianPruner(), 'Hyperband': optuna.pruners.HyperbandPruner() }
sampler_methods = { 'Random': optuna.samplers.RandomSampler(), 'TPE': optuna.samplers.TPESampler() }
# Оптимизация гиперпараметров, если флаг не установлен

for pruner_name, pruner in pruner_methods.items():
   for sampler_name, sampler in sampler_methods.items():
     study_id = f'study_{pruner_name}_{sampler_name}'
     study = optuna.create_study(direction='maximize', pruner=pruner, sampler=sampler, study_name=study_id, storage=storage_name, load_if_exists=True)
     study.optimize(optimization_objective, n_trials=50)
# Загрузка сохранённых исследований
saved_studies = []

[I 2024-12-23 00:15:21,594] A new study created in RDB with name: study_Median_Random
[I 2024-12-23 00:15:27,656] Trial 0 finished with value: 0.9055555555555556 and parameters: {'n_estimators': 53, 'max_depth': 16, 'min_samples_split': 0.902408070065754}. Best is trial 0 with value: 0.9055555555555556.
[I 2024-12-23 00:15:44,175] Trial 1 finished with value: 0.9555555555555556 and parameters: {'n_estimators': 95, 'max_depth': 25, 'min_samples_split': 0.48160398683604877}. Best is trial 1 with value: 0.9555555555555556.
[I 2024-12-23 00:15:47,059] Trial 2 finished with value: 0.9 and parameters: {'n_estimators': 27, 'max_depth': 30, 'min_samples_split': 0.4065702824694962}. Best is trial 1 with value: 0.9555555555555556.
[I 2024-12-23 00:15:52,005] Trial 3 finished with value: 0.9444444444444444 and parameters: {'n_estimators': 81, 'max_depth': 27, 'min_samples_split': 0.6952351866571175}. Best is trial 1 with value: 0.9555555555555556.
[I 2024-12-23 00:15:59,780] Trial 4 finished with

In [60]:
import matplotlib
import plotly.io as pio
for pruner_name in pruner_methods:
    for sampler_name in sampler_methods:
        study_id = f'study_{pruner_name}_{sampler_name}'
        loaded_study = optuna.load_study(study_name=study_id, storage=storage_name)
        saved_studies.append(loaded_study)

# Функция для отображения параметров
def display_trial_params(trial):
    print("Оптимальные параметры: ")
    for param, value in trial.params.items():
        print(f"    {param}: {value}")

# Вывод лучших результатов
print("Наилучшие результаты:")
for study in saved_studies:
    best_trial = study.best_trial
    print(f"  Значение: {best_trial.value}")
    display_trial_params(best_trial)

# Визуализация истории оптимизации
for study in saved_studies:
    best_trial = study.best_trial
    optimization_fig = optuna.visualization.plot_optimization_history(study)
    pio.show(optimization_fig)

# Визуализация важности параметров
for study in saved_studies:
    trial = study.best_trial
    fig = optuna.visualization.plot_param_importances(study)
    plt.show(fig)

Наилучшие результаты:
  Значение: 0.9777777777777777
Оптимальные параметры: 
    n_estimators: 73
    max_depth: 11
    min_samples_split: 0.189713667234201
  Значение: 0.9777777777777777
Оптимальные параметры: 
    n_estimators: 81
    max_depth: 10
    min_samples_split: 0.1884695821464738
  Значение: 0.9722222222222222
Оптимальные параметры: 
    n_estimators: 97
    max_depth: 24
    min_samples_split: 0.19062755481232968
  Значение: 0.975
Оптимальные параметры: 
    n_estimators: 100
    max_depth: 3
    min_samples_split: 0.16999742477459714
  Значение: 0.9777777777777777
Оптимальные параметры: 
    n_estimators: 73
    max_depth: 11
    min_samples_split: 0.189713667234201
  Значение: 0.9777777777777777
Оптимальные параметры: 
    n_estimators: 81
    max_depth: 10
    min_samples_split: 0.1884695821464738
  Значение: 0.9722222222222222
Оптимальные параметры: 
    n_estimators: 97
    max_depth: 24
    min_samples_split: 0.19062755481232968
  Значение: 0.975
Оптимальные параметр