In [None]:
!pip install gmdh

Collecting gmdh
  Downloading gmdh-1.0.3-cp311-cp311-manylinux1_x86_64.whl.metadata (14 kB)
Collecting docstring-inheritance (from gmdh)
  Downloading docstring_inheritance-2.2.2-py3-none-any.whl.metadata (11 kB)
Downloading gmdh-1.0.3-cp311-cp311-manylinux1_x86_64.whl (875 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m875.1/875.1 kB[0m [31m12.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading docstring_inheritance-2.2.2-py3-none-any.whl (24 kB)
Installing collected packages: docstring-inheritance, gmdh
Successfully installed docstring-inheritance-2.2.2 gmdh-1.0.3


In [None]:
!sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.6

update-alternatives: --install needs <link> <name> <path> <priority>

Use 'update-alternatives --help' for program usage information.


In [None]:
import sys
print(sys.version)

3.11.12 (main, Apr  9 2025, 08:55:54) [GCC 11.4.0]


In [None]:
!pip install numpy==1.24.4



1. Подготовка данных

In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import StackingRegressor
import matplotlib.pyplot as plt
import gmdh
import warnings
warnings.filterwarnings('ignore')

# Загрузка набора данных California Housing
housing = fetch_california_housing()
X = pd.DataFrame(housing.data, columns=housing.feature_names)
y = housing.target

# Проверка на пропуски
print("Пропуски в данных:", X.isnull().sum().sum())

# Разделение выборки на обучающую и тестовую
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)

# Масштабирование признаков
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Размер обучающей выборки: {X_train.shape}")
print(f"Размер тестовой выборки: {X_test.shape}")

Пропуски в данных: 0
Размер обучающей выборки: (16512, 8)
Размер тестовой выборки: (4128, 8)


2. Обучение моделей

2.1 Модель стекинга

In [None]:
# Создание базовых моделей для стекинга
base_models = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('gb', GradientBoostingRegressor(n_estimators=100, random_state=42))
]

# Создание мета-модели
meta_model = LinearRegression()

# Создание стекинг-модели
stacking_model = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model,
    cv=5
)

# Обучение модели стекинга
stacking_model.fit(X_train_scaled, y_train)

# Предсказание на тестовой выборке
y_pred_stacking = stacking_model.predict(X_test_scaled)

# Оценка качества модели
mse_stacking = mean_squared_error(y_test, y_pred_stacking)
r2_stacking = r2_score(y_test, y_pred_stacking)

print(f"Stacking - MSE: {mse_stacking:.4f}, R2: {r2_stacking:.4f}")

Stacking - MSE: 0.2534, R2: 0.8066


2.2 Многослойный персептрон (MLP)

In [None]:
# Создание и обучение модели MLP
mlp_model = MLPRegressor(
    hidden_layer_sizes=(100, 50),
    activation='relu',
    solver='adam',
    alpha=0.0001,
    max_iter=1000,
    random_state=42
)

mlp_model.fit(X_train_scaled, y_train)

# Предсказание на тестовой выборке
y_pred_mlp = mlp_model.predict(X_test_scaled)

# Оценка качества модели
mse_mlp = mean_squared_error(y_test, y_pred_mlp)
r2_mlp = r2_score(y_test, y_pred_mlp)

print(f"MLP - MSE: {mse_mlp:.4f}, R2: {r2_mlp:.4f}")

MLP - MSE: 0.2710, R2: 0.7932


2.3 Модели МГУА (GMDH)

In [None]:
from gmdh import Combi, Mia

# Линейный метод COMBI
combi_model = Combi()
combi_model.fit(X_train_scaled, y_train)

# Предсказание на тестовой выборке
y_pred_combi = combi_model.predict(X_test_scaled)

# Оценка качества модели
mse_combi = mean_squared_error(y_test, y_pred_combi)
r2_combi = r2_score(y_test, y_pred_combi)

print(f"COMBI - MSE: {mse_combi:.4f}, R2: {r2_combi:.4f}")

# Нелинейный метод MIA
mia_model = Mia()  # Используем Mia напрямую, а не gmdh.Mia()
mia_model.fit(X_train_scaled, y_train)

# Предсказание на тестовой выборке
y_pred_mia = mia_model.predict(X_test_scaled)

# Оценка качества модели
mse_mia = mean_squared_error(y_test, y_pred_mia)
r2_mia = r2_score(y_test, y_pred_mia)

print(f"MIA - MSE: {mse_mia:.4f}, R2: {r2_mia:.4f}")

COMBI - MSE: 0.5567, R2: 0.5752
MIA - MSE: 0.6363, R2: 0.5144
