In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import shap

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from sqlalchemy import create_engine
import pandas as pd

engine = create_engine('mssql+pyodbc://armin:123456@HP-VICTUS-16\\SQLEXPRESS/housing_price_data?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=yes')

df = pd.read_sql("SELECT * FROM housing_price_data", engine)

In [3]:
X = df.drop("price", axis=1)
y = df["price"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
pipe = Pipeline([("scaler", StandardScaler()),
                 ("regressor", SVR())])

param_grid = [
    {
        "regressor": [SVR()],
        "regressor__C": [0.1, 1, 10],
        "regressor__kernel": ["linear", "rbf"]
    },
    {
        "regressor": [RandomForestRegressor()],
        "regressor__n_estimators": [50, 100],
        "regressor__max_depth": [None, 5, 10]
    },
    {
        "regressor": [KNeighborsRegressor()],
        "regressor__n_neighbors": [3, 5, 7],
        "regressor__weights": ["uniform", "distance"]
    },
    {
        "regressor": [LinearRegression()]
    }
]

In [5]:
grid = GridSearchCV(pipe, param_grid, cv=5)
grid.fit(X_train, y_train)

y_pred = grid.best_estimator_.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print("Najbolji model:", grid.best_estimator_)
print("Najbolji parametri:", grid.best_params_)
print("Najbolji CV score (R2):", grid.best_score_)
print("R2 na test skupu:", r2)
print("MSE na test skupu:", mse)

Najbolji model: Pipeline(steps=[('scaler', StandardScaler()),
                ('regressor', RandomForestRegressor())])
Najbolji parametri: {'regressor': RandomForestRegressor(), 'regressor__max_depth': None, 'regressor__n_estimators': 100}
Najbolji CV score (R2): 0.6414134184593329
R2 na test skupu: 0.7338585584360886
MSE na test skupu: 30269830304.88312
