#### Linear Regression

In [1]:
import pandas as pd
import numpy as np
from sklearn import metrics 
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.svm import SVR
%matplotlib inline

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 5:6].values
y = base.iloc[:, 2].values


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)

regressor = LinearRegression().fit(X_train, y_train)
y_pred = regressor.predict(X_test)

print('linear model coeff (w): {}'
     .format(regressor.coef_))
print('linear model intercept (b): {:.3f}'
     .format(regressor.intercept_))
print('R-squared score (training): {:.3f}'
     .format(regressor.score(X_train, y_train)))
print('R-squared score (test): {:.3f}'
     .format(regressor.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

#### Linear Regression multlayer

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)

regressor = LinearRegression().fit(X_train, y_train)
y_pred = regressor.predict(X_test)

print('linear model coeff (w): {}'
     .format(regressor.coef_))
print('linear model intercept (b): {:.3f}'
     .format(regressor.intercept_))
print('\nR-squared score (training): {:.3f}'
     .format(regressor.score(X_train, y_train)))
print('R-squared score (test): {:.3f}'
     .format(regressor.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

#### Polynominal Regression

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)

X_train_poly = PolynomialFeatures.fit_transform(X_train)
X_test_poly = PolynomialFeatures.transform(X_test)

X_train, X_test, y_train, y_test = train_test_split(X_poly, y,
                                                   random_state = 0, test_size=0.3)
linreg = LinearRegression()
linreg.fit(X_train_poly, y_train)
y_pred = linreg.predict(X_test_poly)

print('(poly deg 2) linear model coeff (w):\n{}'
     .format(linreg.coef_))
print('(poly deg 2) linear model intercept (b): {:.3f}'
     .format(linreg.intercept_))
print('\n(poly deg 2) R-squared score (training): {:.3f}'
     .format(linreg.score(X_train, y_train)))
print('(poly deg 2) R-squared score (test): {:.3f}\n'
     .format(linreg.score(X_test, y_test)))
print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

#### Decision Tree

In [None]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)
from sklearn import metrics 

base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)
regressor = DecisionTreeRegressor()
regressor.fit(X_train, y_train)
score = regressor.score(X_train, y_train)

y_pred = regressor.predict(X_test)

print('Decision score (training): {:.3f}'
     .format(regressor.score(X_train, y_train)))
print('Decision score (test): {:.3f}'
     .format(regressor.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

#### Random Forest

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)

regressor = RandomForestRegressor(n_estimators = 100)
regressor.fit(X, y)
score = regressor.score(X, y)


print('Random Forest score (training): {:.3f}'
     .format(regressor.score(X_train, y_train)))
print('Random Forest score (test): {:.3f}'
     .format(regressor.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

#### MLPR

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2:3].values

from sklearn.preprocessing import StandardScaler
scaler_x = StandardScaler()
X = scaler_x.fit_transform(X)
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)
regressor = MLPRegressor(hidden_layer_sizes = (9,9))
regressor.fit(X_train, y_train)
score = regressor.score(X_train, y_train)

regressor.score(X_test, y_test)

y_pred = regressor.predict(X_test)
y_test = scaler_y.inverse_transform(y_test)
y_pred = scaler_y.inverse_transform(y_pred)


print('MLPR score (training): {:.3f}'
     .format(regressor.score(X_train, y_train)))
print('MLPR score (test): {:.3f}'
     .format(regressor.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

#### SVR

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2:3].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)
# kernel linear
regressor_linear = SVR(kernel = 'linear')
regressor_linear.fit(X, y)

print('kernel linear score (training): {:.3f}'
     .format(regressor_linear.score(X_train, y_train)))
print('kernel linear (test): {:.3f}'
     .format(regressor_linear.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2:3].values


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)
# kernel poly
regressor_poly = SVR(kernel = 'poly', degree = 3)
regressor_poly.fit(X, y)

print('kernel poly (training): {:.3f}'
     .format(regressor_poly.score(X_train, y_train)))
print('kernel poly (test): {:.3f}'
     .format(regressor_poly.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2:3].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)
# kernel rbf
from sklearn.preprocessing import StandardScaler
scaler_x = StandardScaler()
X = scaler_x.fit_transform(X)
scaler_y = StandardScaler()
y = scaler_y.fit_transform(y)

regressor_rbf = SVR(kernel = 'rbf')
regressor_rbf.fit(X, y)

y_pred = regressor.predict(X_test)
y_test = scaler_y.inverse_transform(y_test)
y_pred = scaler_y.inverse_transform(y_pred)


print('kernel rbf (training): {:.3f}'
     .format(regressor_rbf.score(X_train, y_train)))
print('kernel rbf (test): {:.3f}'
     .format(regressor_rbf.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

#### AdaBoost

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)

regressor_ada = AdaBoostRegressor(n_estimators = 100)
regressor_ada.fit(X, y)
score = regressor_ada.score(X, y)

y_pred = regressor_ada.predict(X_test)


print('Random Forest score (training): {:.3f}'
     .format(regressor_ada.score(X_train, y_train)))
print('Random Forest score (test): {:.3f}'
     .format(regressor_ada.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))

In [None]:
base = pd.read_csv('../data/house_prices.csv')

X = base.iloc[:, 3:19].values
y = base.iloc[:, 2].values

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                                  test_size = 0.3,
                                                                  random_state = 0)

svr = SVR(kernel = 'rbf')

regressor_ada = AdaBoostRegressor(n_estimators = 100, base_estimator=svr,learning_rate=1)

regressor_ada.fit(X, y)
score = regressor_ada.score(X, y)

y_pred = regressor_ada.predict(X_test)


print('Random Forest score (training): {:.3f}'
     .format(regressor_ada.score(X_train, y_train)))
print('Random Forest score (test): {:.3f}'
     .format(regressor_ada.score(X_test, y_test)))

print('\nMAE:', metrics.mean_absolute_error(y_test, y_pred))
print('MSE:', metrics.mean_squared_error(y_test,  y_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test,  y_pred)))