In [1]:
# --- 1. ИМПОРТЫ ---
import os
import warnings
import time
import traceback  # <- ИСПРАВЛЕНО: Добавлен импорт
import tempfile   # <- ИСПРАВЛЕНО: Добавлен импорт

from dotenv import load_dotenv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline

from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from xgboost import XGBRegressor

import optuna
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature

# --- 2. НАСТРОЙКИ ---
warnings.filterwarnings('ignore')
load_dotenv()
plt.switch_backend("Agg")

# --- 3. НАСТРОЙКИ MLFLOW ---
MLFLOW_TRACKING_URI = os.getenv("MLFLOW_TRACKING_URI", "http://84.201.144.227:8000")
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
EXPERIMENT_NAME = "financial_timeseries_regression"
mlflow.set_experiment(EXPERIMENT_NAME)
print(f"MLflow URI: {mlflow.get_tracking_uri()}")
print(f"Experiment: {EXPERIMENT_NAME}")

# --- 4. ЗАГРУЗКА ДАННЫХ ---
df = pd.read_csv("data/financial_regression.csv")
df["date"] = pd.to_datetime(df["date"])
df = df.set_index("date")
df.dropna(subset=["gold close"], inplace=True)
df["year"] = df.index.year; df["month"] = df.index.month
y = df["gold close"]
X = df.drop(columns=["gold close"])
split_idx = int(len(X) * 0.8)
X_train_raw, X_test_raw = X.iloc[:split_idx], X.iloc[split_idx:]
y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
feature_names = X_train_raw.columns.tolist()

# --- 5. ВСПОМОГАТЕЛЬНЫЕ ФУНКЦИИ (без изменений) ---
def calc_metrics(y_true, y_pred):
    return {"mae": mean_absolute_error(y_true, y_pred), "r2": r2_score(y_true, y_pred)}

def log_feature_importances(pipeline, model_name, names):
    model = pipeline.named_steps.get("model")
    if hasattr(model, "feature_importances_"):
        importances = model.feature_importances_
        fi = pd.DataFrame({"feature": names, "importance": importances}).sort_values("importance", ascending=False).head(20)
        fig, ax = plt.subplots(figsize=(10, 8))
        ax.barh(fi["feature"], fi["importance"])
        ax.invert_yaxis()
        ax.set_title(f"Feature Importances — {model_name}")
        plt.tight_layout()
        mlflow.log_figure(fig, f"feature_importances_{model_name}.png")
        plt.close(fig)

# --- 6. ФУНКЦИИ OPTUNA (без изменений) ---
def tune_model(trial, model_class, is_catboost=False):
    if is_catboost:
        params = {"iterations": trial.suggest_int("iterations", 100, 200), "depth": trial.suggest_int("depth", 4, 8)}
    else:
        params = {"n_estimators": trial.suggest_int("n_estimators", 100, 200), "max_depth": trial.suggest_int("max_depth", 4, 8)}
    return model_class(random_state=42, **params)

def objective(trial, model_class, is_catboost=False):
    model = tune_model(trial, model_class, is_catboost)
    pipe = Pipeline([("imputer", SimpleImputer(strategy="mean")), ("model", model)])
    # Упрощенная валидация для скорости
    pipe.fit(X_train_raw, y_train)
    pred = pipe.predict(X_test_raw)
    return mean_absolute_error(y_test, pred)

# --- 7. ГЛАВНАЯ ФУНКЦИЯ ЗАПУСКА (ИСПРАВЛЕННАЯ И УПРОЩЕННАЯ) ---
def final_run(model_name, pipeline):
    with mlflow.start_run(run_name=f"{model_name}_Final"):
        # Обучение и оценка
        pipeline.fit(X_train_raw, y_train)
        y_pred = pipeline.predict(X_test_raw)
        metrics = calc_metrics(y_test, y_pred)
        mlflow.log_metrics(metrics)
        log_feature_importances(pipeline, model_name, feature_names)

        # Логирование модели (простой и надежный способ)
        try:
            input_example = X_train_raw.head(5)
            mlflow.sklearn.log_model(
                sk_model=pipeline,
                artifact_path="model_pipeline", # <- ИСПРАВЛЕНО: Возвращен artifact_path
                input_example=input_example
            )
            print(f"✅ УСПЕХ: Модель {model_name} успешно залогирована!")
        except Exception:
            err_txt = "[log_model] failed:\n" + traceback.format_exc()
            print(err_txt)
            # Если основной способ не сработал, пробуем сохранить вручную
            print("Основной метод логирования не удался. Пробуем запасной вариант...")
            try:
                with tempfile.TemporaryDirectory() as tmpdir:
                    local_dir = os.path.join(tmpdir, "model_pipeline")
                    mlflow.sklearn.save_model(sk_model=pipeline, path=local_dir)
                    mlflow.log_artifacts(local_dir, artifact_path="model_pipeline")
                print(f"✅ УСПЕХ: Модель {model_name} успешно залогирована (запасной вариант)!")
            except Exception:
                err_txt = "[save_model/log_artifacts] fallback failed:\n" + traceback.format_exc()
                print(err_txt)
                print(f"❌ ОШИБКА: Не удалось залогировать модель {model_name}.")

# --- 8. ЗАПУСК ЭКСПЕРИМЕНТОВ ---
# CatBoost
print("\n--- CatBoost ---")
study_cat = optuna.create_study(direction="minimize")
study_cat.optimize(lambda trial: objective(trial, CatBoostRegressor, is_catboost=True), n_trials=2)
best_cat_model = tune_model(study_cat.best_trial, CatBoostRegressor, is_catboost=True)
cat_pipe = Pipeline([("imputer", SimpleImputer(strategy="mean")), ("model", best_cat_model)])
final_run("CatBoost_Optuna", cat_pipe)

# XGBoost
print("\n--- XGBoost ---")
study_xgb = optuna.create_study(direction="minimize")
study_xgb.optimize(lambda trial: objective(trial, XGBRegressor), n_trials=2)
best_xgb_model = tune_model(study_xgb.best_trial, XGBRegressor)
xgb_pipe = Pipeline([("imputer", SimpleImputer(strategy="mean")), ("model", best_xgb_model)])
final_run("XGBoost_Optuna", xgb_pipe)

# LightGBM
print("\n--- LightGBM ---")
study_lgbm = optuna.create_study(direction="minimize")
study_lgbm.optimize(lambda trial: objective(trial, LGBMRegressor), n_trials=2)
best_lgbm_model = tune_model(study_lgbm.best_trial, LGBMRegressor)
lgbm_pipe = Pipeline([("imputer", SimpleImputer(strategy="mean")), ("model", best_lgbm_model)])
final_run("LightGBM_Optuna", lgbm_pipe)

print("\n=== All done ===")

[I 2025-09-01 19:57:35,726] A new study created in memory with name: no-name-bfb0b4e5-7946-436a-af35-823bb4969852


MLflow URI: http://84.201.144.227:8000
Experiment: financial_timeseries_regression

--- CatBoost ---
Learning rate set to 0.260374
0:	learn: 16.6287491	total: 139ms	remaining: 17.5s
1:	learn: 12.9823087	total: 142ms	remaining: 8.86s
2:	learn: 9.9834582	total: 144ms	remaining: 5.96s
3:	learn: 7.7232313	total: 147ms	remaining: 4.52s
4:	learn: 6.0556978	total: 150ms	remaining: 3.65s
5:	learn: 4.8299174	total: 153ms	remaining: 3.08s
6:	learn: 3.8485984	total: 155ms	remaining: 2.66s
7:	learn: 3.1415805	total: 158ms	remaining: 2.35s
8:	learn: 2.6121371	total: 161ms	remaining: 2.11s
9:	learn: 2.2704514	total: 164ms	remaining: 1.92s
10:	learn: 2.0118315	total: 167ms	remaining: 1.76s
11:	learn: 1.8360043	total: 169ms	remaining: 1.62s
12:	learn: 1.6970575	total: 172ms	remaining: 1.51s
13:	learn: 1.5992969	total: 175ms	remaining: 1.41s
14:	learn: 1.5189580	total: 177ms	remaining: 1.32s
15:	learn: 1.4704049	total: 180ms	remaining: 1.25s
16:	learn: 1.4361914	total: 183ms	remaining: 1.18s
17:	learn:

[I 2025-09-01 19:57:36,286] Trial 0 finished with value: 11.1105036472402 and parameters: {'iterations': 127, 'depth': 5}. Best is trial 0 with value: 11.1105036472402.


79:	learn: 0.7624193	total: 340ms	remaining: 200ms
80:	learn: 0.7598253	total: 343ms	remaining: 195ms
81:	learn: 0.7571643	total: 345ms	remaining: 189ms
82:	learn: 0.7477602	total: 348ms	remaining: 184ms
83:	learn: 0.7447679	total: 350ms	remaining: 179ms
84:	learn: 0.7417858	total: 353ms	remaining: 174ms
85:	learn: 0.7341638	total: 356ms	remaining: 170ms
86:	learn: 0.7288152	total: 358ms	remaining: 165ms
87:	learn: 0.7259045	total: 361ms	remaining: 160ms
88:	learn: 0.7203325	total: 363ms	remaining: 155ms
89:	learn: 0.7180922	total: 366ms	remaining: 150ms
90:	learn: 0.7147420	total: 369ms	remaining: 146ms
91:	learn: 0.7075832	total: 372ms	remaining: 141ms
92:	learn: 0.7008001	total: 374ms	remaining: 137ms
93:	learn: 0.6986582	total: 376ms	remaining: 132ms
94:	learn: 0.6955708	total: 379ms	remaining: 128ms
95:	learn: 0.6950713	total: 381ms	remaining: 123ms
96:	learn: 0.6917724	total: 384ms	remaining: 119ms
97:	learn: 0.6860198	total: 387ms	remaining: 114ms
98:	learn: 0.6832623	total: 390

[I 2025-09-01 19:57:36,607] Trial 1 finished with value: 9.965203573448914 and parameters: {'iterations': 136, 'depth': 4}. Best is trial 1 with value: 9.965203573448914.


120:	learn: 0.6533830	total: 214ms	remaining: 26.5ms
121:	learn: 0.6509717	total: 216ms	remaining: 24.8ms
122:	learn: 0.6491857	total: 218ms	remaining: 23ms
123:	learn: 0.6481063	total: 219ms	remaining: 21.2ms
124:	learn: 0.6462394	total: 221ms	remaining: 19.5ms
125:	learn: 0.6454318	total: 223ms	remaining: 17.7ms
126:	learn: 0.6442761	total: 225ms	remaining: 15.9ms
127:	learn: 0.6414253	total: 226ms	remaining: 14.1ms
128:	learn: 0.6400658	total: 228ms	remaining: 12.4ms
129:	learn: 0.6382141	total: 230ms	remaining: 10.6ms
130:	learn: 0.6355625	total: 232ms	remaining: 8.84ms
131:	learn: 0.6343918	total: 233ms	remaining: 7.07ms
132:	learn: 0.6327434	total: 235ms	remaining: 5.3ms
133:	learn: 0.6313395	total: 237ms	remaining: 3.53ms
134:	learn: 0.6303149	total: 239ms	remaining: 1.77ms
135:	learn: 0.6255269	total: 240ms	remaining: 0us
Learning rate set to 0.246277
0:	learn: 17.0472393	total: 2.34ms	remaining: 316ms
1:	learn: 13.2956319	total: 4.35ms	remaining: 291ms
2:	learn: 10.4394657	tot

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

[I 2025-09-01 19:57:46,469] A new study created in memory with name: no-name-d99e178a-bda7-455e-993b-988ea8b0a849


✅ УСПЕХ: Модель CatBoost_Optuna успешно залогирована!
🏃 View run CatBoost_Optuna_Final at: http://84.201.144.227:8000/#/experiments/8/runs/50e2caf79c1445799ce66069d856534b
🧪 View experiment at: http://84.201.144.227:8000/#/experiments/8

--- XGBoost ---


[I 2025-09-01 19:57:47,302] Trial 0 finished with value: 8.965506393268544 and parameters: {'n_estimators': 114, 'max_depth': 8}. Best is trial 0 with value: 8.965506393268544.
[I 2025-09-01 19:57:48,489] Trial 1 finished with value: 8.965739840435726 and parameters: {'n_estimators': 174, 'max_depth': 8}. Best is trial 0 with value: 8.965506393268544.


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

[I 2025-09-01 19:57:54,512] A new study created in memory with name: no-name-68eb89f3-5b39-448b-82d8-f0aa09c6d779


✅ УСПЕХ: Модель XGBoost_Optuna успешно залогирована!
🏃 View run XGBoost_Optuna_Final at: http://84.201.144.227:8000/#/experiments/8/runs/53f38cf31efd4f60bfdecf7dba9bd9f6
🧪 View experiment at: http://84.201.144.227:8000/#/experiments/8

--- LightGBM ---


[I 2025-09-01 19:57:56,278] Trial 0 finished with value: 8.927154074239581 and parameters: {'n_estimators': 107, 'max_depth': 6}. Best is trial 0 with value: 8.927154074239581.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001321 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10792
[LightGBM] [Info] Number of data points in the train set: 2975, number of used features: 46
[LightGBM] [Info] Start training from score 135.491479
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001164 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10792
[LightGBM] [Info] Number of data points in the train set: 2975, number of used features: 46
[LightGBM] [Info] Start training from score 135.491479


[I 2025-09-01 19:57:56,423] Trial 1 finished with value: 8.935278759186843 and parameters: {'n_estimators': 145, 'max_depth': 6}. Best is trial 0 with value: 8.927154074239581.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001065 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 10792
[LightGBM] [Info] Number of data points in the train set: 2975, number of used features: 46
[LightGBM] [Info] Start training from score 135.491479


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

✅ УСПЕХ: Модель LightGBM_Optuna успешно залогирована!
🏃 View run LightGBM_Optuna_Final at: http://84.201.144.227:8000/#/experiments/8/runs/620920d8be23464388fed13ac00fb88b
🧪 View experiment at: http://84.201.144.227:8000/#/experiments/8

=== All done ===
