In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt

# Загрузка данных
data1 = pd.read_csv('House_Rent_Dataset.csv')

# Очистка данных от null значений
data_cleaned = data1.dropna()

# Кодирование категориальных признаков
label_encoders = {}
categorical_columns = ['Posted On', 'Floor', 'Area Type', 'Area Locality', 'City', 'Furnishing Status', 'Tenant Preferred', 'Point of Contact']

for column in categorical_columns:
    label_encoders[column] = LabelEncoder()
    data_cleaned[column] = label_encoders[column].fit_transform(data_cleaned[column])

X = data_cleaned.drop('Rent', axis=1)
y = data_cleaned['Rent']

selected_features = ['Size', 'City', 'Bathroom', 'Point of Contact', 'BHK']
X = X[selected_features]

data_combined = pd.concat([X, y], axis=1)

# Удаление выбросов с использованием метода межквартильного размаха (IQR)
Q1 = data_combined.quantile(0.25)
Q3 = data_combined.quantile(0.75)
IQR = Q3 - Q1

data_filtered = data_combined[~((data_combined < (Q1 - 1.5 * IQR)) | (data_combined > (Q3 + 1.5 * IQR))).any(axis=1)]

# Разделение данных на признаки и целевую переменную после удаления выбросов
X_filtered = data_filtered[selected_features]
y_filtered = data_filtered['Rent']

# Разделение данных на обучающую и тестовую выборки
X_train, X_test, y_train, y_test = train_test_split(X_filtered, y_filtered, test_size=0.2, random_state=42)


In [5]:

# Масштабирование данных
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [6]:
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

# Базовые модели
base_models = [
    ('dt', DecisionTreeRegressor(random_state=42)),
    ('rf', RandomForestRegressor(random_state=42))
]

# Мета-модель
meta_model = LinearRegression()

# Стекинг
stacking_model = StackingRegressor(estimators=base_models, final_estimator=meta_model)

# Обучение
stacking_model.fit(X_train_scaled, y_train)

# Предсказание
y_pred_stacking = stacking_model.predict(X_test_scaled)

# Оценка
r2_stacking = r2_score(y_test, y_pred_stacking)
print(f"Stacking R2 Score: {r2_stacking:.4f}")

Stacking R2 Score: 0.6489


In [7]:
from sklearn.neural_network import MLPRegressor

# Создание модели
mlp_model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42, max_iter=500)

# Обучение
mlp_model.fit(X_train_scaled, y_train)

# Предсказание
y_pred_mlp = mlp_model.predict(X_test_scaled)

# Оценка
r2_mlp = r2_score(y_test, y_pred_mlp)
print(f"MLP R2 Score: {r2_mlp:.4f}")

MLP R2 Score: 0.6366




In [9]:
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

# Масштабирование целевой переменной (опционально, если Rent имеет большой разброс)
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1)).flatten()
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1)).flatten()

# Создание модели с увеличенным max_iter и ранней остановкой
mlp_model = MLPRegressor(
    hidden_layer_sizes=(100, 50),  # Архитектура сети
    activation='relu',             # Функция активации
    solver='adam',                 # Оптимизатор
    alpha=0.0001,                  # Параметр регуляризации L2
    batch_size='auto',             # Размер батча
    learning_rate='adaptive',      # Адаптивный learning rate
    learning_rate_init=0.001,      # Начальный learning rate
    max_iter=1000,                 # Увеличиваем количество итераций
    early_stopping=True,           # Ранняя остановка при отсутствии улучшений
    validation_fraction=0.2,       # Доля данных для валидации
    random_state=42,
    verbose=True                   # Вывод лога обучения
)

# Обучение модели
mlp_model.fit(X_train_scaled, y_train_scaled)

# Предсказание и обратное масштабирование (если масштабировали y)
y_pred_mlp_scaled = mlp_model.predict(X_test_scaled)
y_pred_mlp = scaler_y.inverse_transform(y_pred_mlp_scaled.reshape(-1, 1)).flatten()

# Оценка качества
r2_mlp = r2_score(y_test, y_pred_mlp)
print(f"MLP R2 Score: {r2_mlp:.4f}")

Iteration 1, loss = 0.31720590
Validation score: 0.483854
Iteration 2, loss = 0.22146763
Validation score: 0.567049
Iteration 3, loss = 0.19830926
Validation score: 0.589783
Iteration 4, loss = 0.18758486
Validation score: 0.602835
Iteration 5, loss = 0.18136941
Validation score: 0.616006
Iteration 6, loss = 0.17661201
Validation score: 0.624742
Iteration 7, loss = 0.17431196
Validation score: 0.628507
Iteration 8, loss = 0.17012739
Validation score: 0.632586
Iteration 9, loss = 0.16658011
Validation score: 0.641327
Iteration 10, loss = 0.16471641
Validation score: 0.637754
Iteration 11, loss = 0.16296780
Validation score: 0.646139
Iteration 12, loss = 0.16083580
Validation score: 0.640455
Iteration 13, loss = 0.15903614
Validation score: 0.654517
Iteration 14, loss = 0.15626306
Validation score: 0.651591
Iteration 15, loss = 0.15453320
Validation score: 0.657874
Iteration 16, loss = 0.15183813
Validation score: 0.655678
Iteration 17, loss = 0.14970426
Validation score: 0.659969
Iterat

In [10]:
from gmdh import Combi, Multi

# Обучение COMBI
combi_model = Combi()
combi_model.fit(X_train_scaled, y_train)
y_pred_combi = combi_model.predict(X_test_scaled)
r2_combi = r2_score(y_test, y_pred_combi)
print(f"COMBI R2 Score: {r2_combi:.4f}")

# Обучение MIA
mia_model = Multi()
mia_model.fit(X_train_scaled, y_train)
y_pred_mia = mia_model.predict(X_test_scaled)
r2_mia = r2_score(y_test, y_pred_mia)
print(f"MIA R2 Score: {r2_mia:.4f}")

ModuleNotFoundError: No module named 'gmdh'