In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn import metrics

In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RANSACRegressor
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import SGDRegressor
from sklearn.svm import SVR

In [4]:
import warnings
warnings.filterwarnings("ignore")

In [5]:
data = pd.read_csv('../data/Housing_Prices.csv')

In [6]:
X = data[['Avg. Area Income', 'Avg. Area House Age', 'Avg. Area Number of Rooms',
          'Avg. Area Number of Bedrooms', 'Area Population']]
y = data['Price']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
                                                    random_state=42)

In [8]:
pipeline = Pipeline([('std_scalar', StandardScaler())])
X_train = pipeline.fit_transform(X_train)
X_test = pipeline.transform(X_test)

In [9]:
def print_metrics(true_value, predicted_value):
    mae = metrics.mean_absolute_error(true_value, predicted_value)
    mse = metrics.mean_squared_error(true_value, predicted_value)
    rmse = np.sqrt(metrics.mean_squared_error(true_value, predicted_value))
    #msle = metrics.mean_squared_log_error(true_value, predicted_value)
    r2 = metrics.r2_score(true_value, predicted_value)

    print('MAE:', mae)
    print('MSE:', mse)
    print('RMSE:', rmse)
    #print('MSLE:', msle)
    print('R2 Square', r2)
    print()


def add_metrics_to_dataframe(true_value, predicted_value):
    mae = metrics.mean_absolute_error(true_value, predicted_value)
    mse = metrics.mean_squared_error(true_value, predicted_value)
    rmse = np.sqrt(metrics.mean_squared_error(true_value, predicted_value))
    # msle = metrics.mean_squared_log_error(true_value, predicted_value)
    r2 = metrics.r2_score(true_value, predicted_value)

    return mae, mse, rmse, r2

# Множественная регрессия

In [10]:
linear_regression= LinearRegression(normalize=True)
linear_regression.fit(X_train,y_train)

LinearRegression(normalize=True)

In [11]:
test_pred_linear_regression = linear_regression.predict(X_test)
train_pred_linear_regression = linear_regression.predict(X_train)

In [12]:
print('Линейная регрессия\n')
print('Оценка тестового набора:\n')
print_metrics(y_test, test_pred_linear_regression)
print('Оценка тренировочного набора:\n')
print_metrics(y_train, train_pred_linear_regression)

Линейная регрессия

Оценка тестового набора:

MAE: 80879.09723489443
MSE: 10089009300.89399
RMSE: 100444.06055558482
R2 Square 0.9179971706834331

Оценка тренировочного набора:

MAE: 81509.3933124445
MSE: 10256318867.482723
RMSE: 101273.48551068401
R2 Square 0.9179787435623722



# Робастная регрессия

In [13]:
ransac_regression = RANSACRegressor(base_estimator=LinearRegression(),
                                    max_trials=100)
ransac_regression.fit(X_train, y_train)

RANSACRegressor(base_estimator=LinearRegression())

In [14]:
test_pred_ransac_regression = ransac_regression.predict(X_test)
train_pred_ransac_regression = ransac_regression.predict(X_train)

In [15]:
print('Робастная регрессия')
print('Оценка тестового набора:\n')
print_metrics(y_test, test_pred_ransac_regression)
print('Оценка тренировочного набора:\n')
print_metrics(y_train, train_pred_ransac_regression)

Робастная регрессия
Оценка тестового набора:

MAE: 83349.12538554944
MSE: 10710240039.34074
RMSE: 103490.2895896071
R2 Square 0.912947846543496

Оценка тренировочного набора:

MAE: 83736.86232029837
MSE: 10990281714.691156
RMSE: 104834.54447218796
R2 Square 0.9121091371583203



# Регрессия гребня

In [16]:
ridge_regression = Ridge(alpha=100, solver='cholesky', tol=0.0001, random_state=42)
ridge_regression.fit(X_train, y_train)

Ridge(alpha=100, random_state=42, solver='cholesky', tol=0.0001)

In [17]:
test_pred_ridge_regression = ridge_regression.predict(X_test)
train_pred_ridge_regression = ridge_regression.predict(X_train)

In [18]:
print('Регрессия гребня\n')
print('Оценка тестового набора:\n')
print_metrics(y_test, test_pred_ridge_regression)
print('Оценка тренировочного набора:\n')
print_metrics(y_train, train_pred_ridge_regression)

Регрессия гребня

Оценка тестового набора:

MAE: 80995.02167280123
MSE: 10162063037.444983
RMSE: 100807.05847035207
R2 Square 0.9174033945344908

Оценка тренировочного набора:

MAE: 81925.75280276715
MSE: 10331167369.014244
RMSE: 101642.35027297551
R2 Square 0.9173801693353605



R2 Square 0.9173801693353605


# Регрессия LASSO

In [19]:
lasso_regression = Lasso(alpha=0.1, precompute=True, positive=True,
                         selection='random', random_state=42)
lasso_regression.fit(X_train, y_train)

Lasso(alpha=0.1, positive=True, precompute=True, random_state=42,
      selection='random')

In [20]:
test_pred_LASSO_regression = lasso_regression.predict(X_test)
train_pred_LASSO_regression = lasso_regression.predict(X_train)

In [21]:
print('Регрессия LASSO\n')
print('Оценка тестового набора:\n')
print_metrics(y_test, test_pred_LASSO_regression)
print('Оценка тренировочного набора:\n')
print_metrics(y_train, train_pred_LASSO_regression)

Регрессия LASSO

Оценка тестового набора:

MAE: 80879.23132573774
MSE: 10089072746.54004
RMSE: 100444.37638086086
R2 Square 0.917996655001236

Оценка тренировочного набора:

MAE: 81509.54922857058
MSE: 10256318921.380966
RMSE: 101273.48577678645
R2 Square 0.9179787431313402



# Полиномиальная регрессия

In [22]:
polynomial_regression = PolynomialFeatures(degree=2)

In [23]:
X_train_2_d = polynomial_regression.fit_transform(X_train)
X_test_2_d = polynomial_regression.transform(X_test)

linear_regression_for_polynomial = LinearRegression(normalize=True)
linear_regression_for_polynomial.fit(X_train_2_d,y_train)

test_pred_polynomial_regression= linear_regression_for_polynomial.predict(X_test_2_d)
train_pred_polynomial_regression = linear_regression_for_polynomial.predict(X_train_2_d)

In [24]:
print('Полиномиальная регрессия\n')
print('Оценка тестового набора:\n')
print_metrics(y_test, test_pred_polynomial_regression)
print('Оценка тренировочного набора:\n')
print_metrics(y_train, train_pred_polynomial_regression)

Полиномиальная регрессия

Оценка тестового набора:

MAE: 80886.67204231035
MSE: 10099268148.792528
RMSE: 100495.11504940192
R2 Square 0.9179137874167398

Оценка тренировочного набора:

MAE: 81387.55946584558
MSE: 10235274577.766567
RMSE: 101169.5338417973
R2 Square 0.9181470377725716



# Стохастический Градиентный спуск

In [25]:
sgd_regression = SGDRegressor(n_iter_no_change=250, penalty=None, eta0=0.0001, max_iter=100000)
sgd_regression.fit(X_train, y_train)

SGDRegressor(eta0=0.0001, max_iter=100000, n_iter_no_change=250, penalty=None)

In [26]:
test_pred_sgd_regression = sgd_regression.predict(X_test)
train_pred_sgd_regression = sgd_regression.predict(X_train)

In [27]:
print('Стохастический Градиентный спуск\n')
print('Оценка тестового набора:\n')
print_metrics(y_test, test_pred_sgd_regression)
print('Оценка тренировочного набора:\n')
print_metrics(y_train, train_pred_sgd_regression)

Стохастический Градиентный спуск

Оценка тестового набора:

MAE: 80879.09429471445
MSE: 10089008104.716835
RMSE: 100444.0546011402
R2 Square 0.9179971804058854

Оценка тренировочного набора:

MAE: 81509.39485577661
MSE: 10256318867.596985
RMSE: 101273.48551124813
R2 Square 0.9179787435614584



# Метод опорных векторов

In [28]:
svm_regression = SVR(kernel='rbf', C=1000000, epsilon=0.001)
svm_regression.fit(X_train, y_train)

SVR(C=1000000, epsilon=0.001)

In [29]:
test_pred_svm_regression = svm_regression.predict(X_test)
train_pred_svm_regression = svm_regression.predict(X_train)

In [30]:
print('Метод опорных векторов\n')
print('Оценка тестового набора:\n')
print_metrics(y_test, test_pred_svm_regression)
print('Оценка тренировочного набора:\n')
print_metrics(y_train, train_pred_svm_regression)

Метод опорных векторов

Оценка тестового набора:

MAE: 87109.10221419216
MSE: 11963457919.118093
RMSE: 109377.59331379573
R2 Square 0.9027617709014857

Оценка тренировочного набора:

MAE: 74229.75847145652
MSE: 9412216645.75215
RMSE: 97016.57923134658
R2 Square 0.9247291503781782

