In [1]:
!docker run --name postgres-container -e POSTGRES_PASSWORD=mysecretpassword -d -p 5432:5432 postgres
!docker logs postgres-container


docker: Error response from daemon: Conflict. The container name "/postgres-container" is already in use by container "10bc2c8faa5bfc60b2dba23c5016eecc767fca23bf13bf313f65c047bd18b46f". You have to remove (or rename) that container to be able to reuse that name.
See 'docker run --help'.


The files belonging to this database system will be owned by user "postgres".
This user must also own the server process.

The database cluster will be initialized with locale "en_US.utf8".
The default database encoding has accordingly been set to "UTF8".
The default text search configuration will be set to "english".

Data page checksums are disabled.

fixing permissions on existing directory /var/lib/postgresql/data ... ok
creating subdirectories ... ok
selecting dynamic shared memory implementation ... posix
selecting default "max_connections" ... 100
selecting default "shared_buffers" ... 128MB
selecting default time zone ... Etc/UTC
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok


Success. You can now start the database server using:

    pg_ctl -D /var/lib/postgresql/data -l logfile start

waiting for server to start....2024-12-22 21:12:03.293 UTC [48] LOG:  starting PostgreSQL 17.2 (

initdb: hint: You can change this by editing pg_hba.conf or using the option -A, or --auth-local and --auth-host, the next time you run initdb.
2024-12-22 21:12:03.534 UTC [1] LOG:  starting PostgreSQL 17.2 (Debian 17.2-1.pgdg120+1) on x86_64-pc-linux-gnu, compiled by gcc (Debian 12.2.0-14) 12.2.0, 64-bit
2024-12-22 21:12:03.534 UTC [1] LOG:  listening on IPv4 address "0.0.0.0", port 5432
2024-12-22 21:12:03.534 UTC [1] LOG:  listening on IPv6 address "::", port 5432
2024-12-22 21:12:03.539 UTC [1] LOG:  listening on Unix socket "/var/run/postgresql/.s.PGSQL.5432"
2024-12-22 21:12:03.545 UTC [62] LOG:  database system was shut down at 2024-12-22 21:12:03 UTC
2024-12-22 21:12:03.550 UTC [1] LOG:  database system is ready to accept connections
2024-12-22 21:16:00.008 UTC [75] ERROR:  syntax error at or near "psql" at character 1
2024-12-22 21:16:00.008 UTC [75] STATEMENT:  psql -h localhost -U postgres
	CREATE DATABASE my_db;
2024-12-22 21:17:04.185 UTC [60] LOG:  checkpoint starting: ti

In [2]:
import optuna
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing, load_digits
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import StandardScaler
import psycopg2
import json
import matplotlib.pyplot as plt
from optuna.pruners import MedianPruner
from optuna.samplers import RandomSampler, TPESampler


In [3]:
# Загрузка данных для классификации
data_digits = load_digits()
X_digits = data_digits.data
y_digits = data_digits.target

# Разделение данных для классификации
X_train_digits, X_test_digits, y_train_digits, y_test_digits = train_test_split(
    X_digits, y_digits, test_size=0.2, random_state=42
)

# Загрузка данных для регрессии
data_housing = fetch_california_housing()
X_housing = data_housing.data
y_housing = data_housing.target

# Разделение данных для регрессии
X_train_housing, X_test_housing, y_train_housing, y_test_housing = train_test_split(
    X_housing, y_housing, test_size=0.2, random_state=42
)

# Стандартизация данных
scaler = StandardScaler()

X_train_digits = scaler.fit_transform(X_train_digits)
X_test_digits = scaler.transform(X_test_digits)

X_train_housing = scaler.fit_transform(X_train_housing)
X_test_housing = scaler.transform(X_test_housing)

# Функция для оптимизации гиперпараметров RandomForestClassifier
def objective_rf(trial):
    n_estimators = trial.suggest_int('n_estimators', 50, 200)
    max_depth = trial.suggest_int('max_depth', 1, 10)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 10)
    
    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        random_state=42
    )
    model.fit(X_train_digits, y_train_digits)
    y_pred = model.predict(X_test_digits)
    accuracy = accuracy_score(y_test_digits, y_pred)
    return accuracy

# Функция для оптимизации гиперпараметров SVR
def objective_svr(trial):
    C = trial.suggest_loguniform('C', 1e-5, 1e5)
    epsilon = trial.suggest_uniform('epsilon', 0, 1)
    
    model = SVR(C=C, epsilon=epsilon)
    model.fit(X_train_housing, y_train_housing)
    y_pred = model.predict(X_test_housing)
    mse = mean_squared_error(y_test_housing, y_pred)
    return mse

In [None]:
# Оптимизация с использованием TPE Sampler и Median Pruner
study_rf_tpe = optuna.create_study(direction="maximize", sampler=TPESampler(), pruner=MedianPruner())
study_rf_tpe.optimize(objective_rf, n_trials=50)

study_svr_tpe = optuna.create_study(direction="minimize", sampler=TPESampler(), pruner=MedianPruner())
study_svr_tpe.optimize(objective_svr, n_trials=50)

# Оптимизация с использованием Random Sampler и без прунера
study_rf_random = optuna.create_study(direction="maximize", sampler=RandomSampler(), pruner=None)
study_rf_random.optimize(objective_rf, n_trials=50)

study_svr_random = optuna.create_study(direction="minimize", sampler=RandomSampler(), pruner=None)
study_svr_random.optimize(objective_svr, n_trials=50)


[I 2024-12-23 00:51:47,158] A new study created in memory with name: no-name-38cbffb3-c9f5-4910-81e7-2306714724cb
[I 2024-12-23 00:51:47,301] Trial 0 finished with value: 0.8305555555555556 and parameters: {'n_estimators': 98, 'max_depth': 2, 'min_samples_split': 8}. Best is trial 0 with value: 0.8305555555555556.
[I 2024-12-23 00:51:47,505] Trial 1 finished with value: 0.9583333333333334 and parameters: {'n_estimators': 94, 'max_depth': 6, 'min_samples_split': 7}. Best is trial 1 with value: 0.9583333333333334.
[I 2024-12-23 00:51:47,688] Trial 2 finished with value: 0.825 and parameters: {'n_estimators': 113, 'max_depth': 2, 'min_samples_split': 2}. Best is trial 1 with value: 0.9583333333333334.
[I 2024-12-23 00:51:47,967] Trial 3 finished with value: 0.9722222222222222 and parameters: {'n_estimators': 97, 'max_depth': 9, 'min_samples_split': 2}. Best is trial 3 with value: 0.9722222222222222.
[I 2024-12-23 00:51:48,172] Trial 4 finished with value: 0.6777777777777778 and parameters

In [None]:
# Подключение к PostgreSQL
connection = psycopg2.connect(
    dbname="my_db",
    user="postgres",
    password="mysecretpassword",
    host="localhost",
    port="5432"
)
cursor = connection.cursor()

# Вставка результатов для RandomForestClassifier
cursor.execute("""
    INSERT INTO model_results (model_name, best_params, best_accuracy)
    VALUES (%s, %s, %s)
""", ("RandomForestClassifier TPE", json.dumps(study_rf_tpe.best_params), study_rf_tpe.best_value))

cursor.execute("""
    INSERT INTO model_results (model_name, best_params, best_accuracy)
    VALUES (%s, %s, %s)
""", ("SVR TPE", json.dumps(study_svr_tpe.best_params), study_svr_tpe.best_value))

cursor.execute("""
    INSERT INTO model_results (model_name, best_params, best_accuracy)
    VALUES (%s, %s, %s)
""", ("RandomForestClassifier Random", json.dumps(study_rf_random.best_params), study_rf_random.best_value))

cursor.execute("""
    INSERT INTO model_results (model_name, best_params, best_accuracy)
    VALUES (%s, %s, %s)
""", ("SVR Random", json.dumps(study_svr_random.best_params), study_svr_random.best_value))

connection.commit()
cursor.close()
connection.close()


In [None]:
# Визуализация результатов
import matplotlib.pyplot as plt

fig, ax = plt.subplots(figsize=(10, 6))
ax.plot(
    range(1, len(study_rf_random.trials)+1),
    [t.value for t in study_rf_random.trials],
    label="RandomForest Random", color="r"
)
ax.plot(
    range(1, len(study_svr_random.trials)+1),
    [t.value for t in study_svr_random.trials],
    label="SVR Random", color="y"
)
ax.plot(
    range(1, len(study_rf_tpe.trials)+1),
    [t.value for t in study_rf_tpe.trials],
    label="RandomForest TPE", color="b"
)
ax.plot(
    range(1, len(study_svr_tpe.trials)+1),
    [t.value for t in study_svr_tpe.trials],
    label="SVR TPE", color="g"
)

ax.set_xlabel("Trial")
ax.set_ylabel("Score")
ax.legend()
ax.grid()
plt.title("Оптимизация гиперпараметров моделей")
plt.show()
