# Introduction to Neural Networks

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import LinearRegression, RidgeCV
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

Let's return to the King County housing dataset for this exercise.

We'll start by doing the standard feature creation.

In [None]:
kc = pd.read_csv('data/kc_house_data.csv')

X = kc[['date', 'bedrooms', 'bathrooms', 'sqft_living',
       'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',
       'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',
       'lat', 'long', 'sqft_living15', 'sqft_lot15']].copy()
y = kc['price']

X['date'] = pd.to_datetime(X['date'])
X['sales_year'] = X['date'].dt.year

X['age_at_sale'] = X['sales_year'] - X['yr_built']
X['years_since_renovation'] = X['sales_year'] - np.max(X[['yr_built', 'yr_renovated']], axis = 1)


X = X.drop(columns = ['date', 'sales_year', 'yr_built', 'yr_renovated'])

X = pd.get_dummies(X, columns = ['zipcode'])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 321)

Let's start by just making a basic linear regression model with minimal preprocessing.

In [None]:
linreg = LinearRegression().fit(X_train, y_train)

print(f'R2 Score: {r2_score(y_test, linreg.predict(X_test))}')
print(f'MAE: {mean_absolute_error(y_test, linreg.predict(X_test))}')

And for comparison, we'll do a ridge regression model as well.

In [None]:
pipe = Pipeline(
    steps = [
        ('scale', StandardScaler()),
        ('linreg', RidgeCV())
    ]
)

pipe.fit(X_train, y_train)

print(f'R2 Score: {r2_score(y_test, ridge.predict(X_test))}')
print(f'MAE: {mean_absolute_error(y_test, ridge.predict(X_test))}')

Home prices tend to be very skewed, and these are no different. It may be advantageous to transform the target before fitting our model so that it pays less attention to the extreme-priced homes. We can try using the logarithm to transform these.

In [None]:
y_train.hist()
plt.title('Untransformed');

In [None]:
np.log(y_train).hist()
plt.title('Transformed');

If we want to apply a transformation to the target variable, the best way to do this so that we are making fair evaluations is to use a TransformedTargetRegressor.

In [None]:
from sklearn.compose import TransformedTargetRegressor

In [None]:
pipe = Pipeline(
    steps = [
        ('scale', StandardScaler()),
        ('linreg', RidgeCV())
    ]
)

ttr = TransformedTargetRegressor(
    regressor = pipe,
    func = np.log,
    inverse_func = np.exp
)

ttr.fit(X_train, y_train)

print(f'R2 Score: {r2_score(y_test, ttr.predict(X_test))}')
print(f'MAE: {mean_absolute_error(y_test, ttr.predict(X_test))}')

In [None]:
non_zips = [x for x in X_train.columns if 'zipcode' not in x]
non_zips

X_train[non_zips].skew().sort_values(ascending = False)

In [None]:
transformed_columns = ['sqft_living', 'sqft_lot', 'bedrooms', 
                                               'sqft_basement', 'sqft_above', 'sqft_living', 'sqft_living15']

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import PowerTransformer

In [None]:
pipe = Pipeline(
    steps = [
        ('ct', ColumnTransformer(
            transformers = [
                ('power', PowerTransformer(), transformed_columns)
            ],
            remainder = 'passthrough'
        )),
        ('scaler', StandardScaler()),
        ('linear', RidgeCV())
    ]
)

ttr = TransformedTargetRegressor(
    regressor = pipe,
    func = np.log,
    inverse_func = np.exp
)

ttr.fit(X_train, y_train)

print(f'R2 Score: {r2_score(y_test, ttr.predict(X_test))}')
print(f'MAE: {mean_absolute_error(y_test, ttr.predict(X_test))}')

In [None]:
pipe = Pipeline(
    steps = [
        ('ct', ColumnTransformer(
            transformers = [
                ('power', PowerTransformer(), transformed_columns)
            ],
            remainder = 'passthrough'
        )),
        ('scaler', StandardScaler()),
        ('linear', RidgeCV(alphas = [10, 15, 20]))
    ]
)

ttr = TransformedTargetRegressor(
    regressor = pipe,
    func = np.log,
    inverse_func = np.exp
)

ttr.fit(X_train, y_train)

print(f'R2 Score: {r2_score(y_test, ttr.predict(X_test))}')
print(f'MAE: {mean_absolute_error(y_test, ttr.predict(X_test))}')

In [None]:
ttr.regressor_['linear'].alpha_

Now that we've gotten some baseline scores, let's see how we do using a neural network.

Because of the way that neural networks optimize their coefficients, it is useful to scale your variable values. In this case, we'll go with a MinMaxScaler, which will convert all variables to values between 0 and 1.

In [None]:
from sklearn.preprocessing import MinMaxScaler

In [None]:
pipe = Pipeline(
    steps = [
        ('scaler', MinMaxScaler()),
        ('mlr', MLPRegressor(verbose = True))
    ]
)

In [None]:
pipe.fit(X_train, y_train)

In [None]:
print(f'R2 Score: {r2_score(y_test, pipe.predict(X_test))}')
print(f'MAE: {mean_absolute_error(y_test, pipe.predict(X_test))}')

You very likely got a ConvergenceWarning, which indicated that the model thinks that it is not yet at a local minimum. You could up the max_iter value or you could take a different approach - add more layers.

Let's try adding a few layers to see if it improves the model's performance.

In [None]:
pipe = Pipeline(
    steps = [
        ('scaler', MinMaxScaler()),
        ('mlr', MLPRegressor(verbose = True,
                            hidden_layer_sizes = (100, 100, 100)))
    ]
)

pipe.fit(X_train, y_train)

In [None]:
print(f'R2: {r2_score(y_test, pipe.predict(X_test))}')

print(f'MAE: {mean_absolute_error(y_test, pipe.predict(X_test))}')

If you look at the documentation, you'll see that there is a regularization parameter, alpha, that you can experiment with. Let's see what happens if we up the regularization strength.

In [None]:
pipe = Pipeline(
    steps = [
        ('scaler', MinMaxScaler()),
        ('mlr', MLPRegressor(verbose = True,
                            hidden_layer_sizes = (100, 100, 100),
                            alpha = 1))
    ]
)

pipe.fit(X_train, y_train)

In [None]:
print(f'R2: {r2_score(y_test, pipe.predict(X_test))}')

print(f'MAE: {mean_absolute_error(y_test, pipe.predict(X_test))}')

**Your Turn:** Experiment with the neural network model. By adjusting the number of width of hidden layers, regularization strength, variable transfomations, or number of iterations, can you find a model that does significantly better than a simple ridge regression?

In [None]:
pipe = Pipeline(
    steps = [
        ('scaler', MinMaxScaler()),
        ('mlr', MLPRegressor(verbose = True,
                            hidden_layer_sizes = (100, 100, 100),
                            alpha = 10,
                            max_iter = 1000))
    ]
)

pipe.fit(X_train, y_train)

In [None]:
print(f'R2: {r2_score(y_test, pipe.predict(X_test))}')

print(f'MAE: {mean_absolute_error(y_test, pipe.predict(X_test))}')