In [1]:
#import the necessities
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
import warnings
from scipy.stats import bartlett, levene
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV, ElasticNetCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sqlalchemy import create_engine
from statsmodels.tools.eval_measures import mse, rmse
%matplotlib inline
warnings.filterwarnings('ignore')

In [2]:
#create the authorization variables
postgres_user = 'dsbc_student'
postgres_pw = '7*.8G9QH21'
postgres_host = '142.93.121.174'
postgres_port = '5432'
houses_db = 'houseprices'

In [3]:
#create and dispose of engine
engine = create_engine('postgresql://{}:{}@{}:{}/{}'.format(
    postgres_user, postgres_pw, postgres_host, postgres_port, houses_db))

houseprices_df = pd.read_sql_query('SELECT * FROM houseprices', con=engine)

engine.dispose()

In [4]:
#we'll define a function here that will allow us to test quickly
#below to tune to the best parameters and test after
def tuning():
    y = info_df['saleprice']
    X = info_df.drop(['saleprice'], axis=1)
    X = sm.add_constant(X)
    results = sm.OLS(y, X).fit()
    print(results.summary())

def lrm_test():
    y = info_df['saleprice']
    X = info_df.drop(['saleprice'], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=357)
    X_train = sm.add_constant(X_train)
    X_test = sm.add_constant(X_test)
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    y_train_predictions = lr.predict(X_train)
    y_test_predictions = lr.predict(X_test)
    print('OLS Regression')
    print("R-squared of the model in the training set is: {}".format(lr.score(X_train, y_train)))
    print("R-squared of the model in the test set is: {}".format(lr.score(X_test, y_test)))
    print("\nMean absolute error of the prediction is: {}".format(mean_absolute_error(y_test, y_test_predictions)))
    print("Mean squared error of the prediction is: {:3e}".format(mse(y_test, y_test_predictions)))
    print("Root mean squared error of the prediction is: {}".format(rmse(y_test, y_test_predictions)))
    print("Mean absolute percentage error of the prediction is: {}".format(np.mean(np.abs((y_test - y_test_predictions) / y_test)) * 100))
    
def lasso_test():
    y = info_df['saleprice']
    X = info_df.drop(['saleprice'], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=357)
    X_train = sm.add_constant(X_train)
    X_test = sm.add_constant(X_test)
    lr = LassoCV(cv=10)
    lr.fit(X_train, y_train)
    y_train_predictions = lr.predict(X_train)
    y_test_predictions = lr.predict(X_test)
    print("R-squared of the model in the training set is: {}".format(lr.score(X_train, y_train)))
    print("R-squared of the model in the test set is: {}".format(lr.score(X_test, y_test)))
    print("\nMean absolute error of the prediction is: {}".format(mean_absolute_error(y_test, y_test_predictions)))
    print("Mean squared error of the prediction is: {:3e}".format(mse(y_test, y_test_predictions)))
    print("Root mean squared error of the prediction is: {}".format(rmse(y_test, y_test_predictions)))
    print("Mean absolute percentage error of the prediction is: {}".format(np.mean(np.abs((y_test - y_test_predictions) / y_test)) * 100))

def ridge_test():
    y = info_df['saleprice']
    X = info_df.drop(['saleprice'], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=357)
    X_train = sm.add_constant(X_train)
    X_test = sm.add_constant(X_test)
    lr = RidgeCV(cv=10)
    lr.fit(X_train, y_train)
    y_train_predictions = lr.predict(X_train)
    y_test_predictions = lr.predict(X_test)
    print('Ridge Regression')
    print("R-squared of the model in the training set is: {}".format(lr.score(X_train, y_train)))
    print("R-squared of the model in the test set is: {}".format(lr.score(X_test, y_test)))
    print("\nMean absolute error of the prediction is: {}".format(mean_absolute_error(y_test, y_test_predictions)))
    print("Mean squared error of the prediction is: {:3e}".format(mse(y_test, y_test_predictions)))
    print("Root mean squared error of the prediction is: {}".format(rmse(y_test, y_test_predictions)))
    print("Mean absolute percentage error of the prediction is: {}".format(np.mean(np.abs((y_test - y_test_predictions) / y_test)) * 100))
    
def en_test():
    y = info_df['saleprice']
    X = info_df.drop(['saleprice'], axis=1)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=357)
    X_train = sm.add_constant(X_train)
    X_test = sm.add_constant(X_test)
    lr = ElasticNetCV(cv=10)
    lr.fit(X_train, y_train)
    y_train_predictions = lr.predict(X_train)
    y_test_predictions = lr.predict(X_test)
    print('Elastic Net Regression')
    print("R-squared of the model in the training set is: {}".format(lr.score(X_train, y_train)))
    print("R-squared of the model in the test set is: {}".format(lr.score(X_test, y_test)))
    print("\nMean absolute error of the prediction is: {}".format(mean_absolute_error(y_test, y_test_predictions)))
    print("Mean squared error of the prediction is: {:3e}".format(mse(y_test, y_test_predictions)))
    print("Root mean squared error of the prediction is: {}".format(rmse(y_test, y_test_predictions)))
    print("Mean absolute percentage error of the prediction is: {}".format(np.mean(np.abs((y_test - y_test_predictions) / y_test)) * 100))

In [5]:
#look at the shape and info for the dataset
print(houseprices_df.shape)
display(houseprices_df.info())

(1460, 81)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 81 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             1460 non-null   int64  
 1   mssubclass     1460 non-null   int64  
 2   mszoning       1460 non-null   object 
 3   lotfrontage    1201 non-null   float64
 4   lotarea        1460 non-null   int64  
 5   street         1460 non-null   object 
 6   alley          91 non-null     object 
 7   lotshape       1460 non-null   object 
 8   landcontour    1460 non-null   object 
 9   utilities      1460 non-null   object 
 10  lotconfig      1460 non-null   object 
 11  landslope      1460 non-null   object 
 12  neighborhood   1460 non-null   object 
 13  condition1     1460 non-null   object 
 14  condition2     1460 non-null   object 
 15  bldgtype       1460 non-null   object 
 16  housestyle     1460 non-null   object 
 17  overallqual    1460 non-null   int64  
 1

None

In [6]:
#we'll start by selecting only columns with int and float
info_df = houseprices_df.select_dtypes(exclude=object)
info_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 38 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   id             1460 non-null   int64  
 1   mssubclass     1460 non-null   int64  
 2   lotfrontage    1201 non-null   float64
 3   lotarea        1460 non-null   int64  
 4   overallqual    1460 non-null   int64  
 5   overallcond    1460 non-null   int64  
 6   yearbuilt      1460 non-null   int64  
 7   yearremodadd   1460 non-null   int64  
 8   masvnrarea     1452 non-null   float64
 9   bsmtfinsf1     1460 non-null   int64  
 10  bsmtfinsf2     1460 non-null   int64  
 11  bsmtunfsf      1460 non-null   int64  
 12  totalbsmtsf    1460 non-null   int64  
 13  firstflrsf     1460 non-null   int64  
 14  secondflrsf    1460 non-null   int64  
 15  lowqualfinsf   1460 non-null   int64  
 16  grlivarea      1460 non-null   int64  
 17  bsmtfullbath   1460 non-null   int64  
 18  bsmthalf

In [7]:
#garageyrbuilt doesn't really have a good value to replace the null with
#so I'll just drop it.
info_df = info_df.drop(['garageyrblt'], axis=1)
#some of the float and int have null values so we need to correct those
info_df = info_df.fillna(0)
#having done all of this ridiculously long work, we'll cut a couple of 
#columns that are not helpful. id isn't useful for me and mssubclass
#just is a representation of other information
info_df = info_df.drop(['id'], axis=1)

In [8]:
info_df['mssubclass'] = info_df['mssubclass'].astype('category')
info_df['mosold'] = info_df['mosold'].astype('category')
dummy = pd.get_dummies(info_df['mssubclass'], drop_first=True)
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)
dummy = pd.get_dummies(info_df['mosold'], drop_first=True)
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)
info_df = info_df.drop(['mssubclass', 'mosold'], axis=1)

In [9]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.820
Model:                            OLS   Adj. R-squared:                  0.813
Method:                 Least Squares   F-statistic:                     114.2
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:58   Log-Likelihood:                -17292.
No. Observations:                1460   AIC:                         3.470e+04
Df Residuals:                    1403   BIC:                         3.500e+04
Df Model:                          56                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const         -2.602e+05   1.42e+06     -0.184

This is just a note to myself to the variables I found statistically insignificant based on p-values

lotfrontage (drop?)
bsmtfinsf2 (drop)
bsmtunfsf (drop)
(maybe add all bsmt values together?)
lowqualfinsf (drop)
bsmthalfbath (drop) (categorical?)
fullbath (keep for now)
halfbath (keep for now)
garagearea (drop, cars gives us the same info)
openporchsf (keep for now)
enclosedporch (keep for now)
threessnporch (keep for now)
(maybe add all the porch values together for analysis?)
poolarea (keep for now)
miscval (drop)
mosold (drop) (categorical)
yrsold (keep for now)
40
45
90
180
190
all months

In [10]:
#these are immediately found to be very statistically insignificant, so they're dropped
info_df = info_df.drop(['bsmtfinsf2', 'bsmtunfsf', 'lowqualfinsf', 'bsmthalfbath',
                       'garagearea', 'miscval', 40, 45, 90, 180, 190, 2, 3, 4, 5,
                       6, 7, 8, 9, 10, 11, 12], axis=1)

In [11]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.818
Model:                            OLS   Adj. R-squared:                  0.813
Method:                 Least Squares   F-statistic:                     177.1
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:58   Log-Likelihood:                -17302.
No. Observations:                1460   AIC:                         3.468e+04
Df Residuals:                    1423   BIC:                         3.487e+04
Df Model:                          36                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const          6716.1520   1.38e+06      0.005

In [12]:
info_df = info_df.drop(['lotfrontage', 'yearremodadd', 85], axis=1)

In [13]:
#run the tuning function()
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.817
Model:                            OLS   Adj. R-squared:                  0.812
Method:                 Least Squares   F-statistic:                     192.6
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:58   Log-Likelihood:                -17305.
No. Observations:                1460   AIC:                         3.468e+04
Df Residuals:                    1426   BIC:                         3.486e+04
Df Model:                          33                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const          4.412e+04   1.38e+06      0.032

In [14]:
#it's possible that the insignificance for these values will fall away if their
#respective sums are injected into the model
info_df['interiorsf'] = info_df['totalbsmtsf'] + info_df['firstflrsf'] + info_df['secondflrsf']
info_df['porchsf'] = (info_df['openporchsf'] + info_df['enclosedporch'] + info_df['threessnporch']
                      + info_df['screenporch'])

In [15]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.817
Model:                            OLS   Adj. R-squared:                  0.812
Method:                 Least Squares   F-statistic:                     192.6
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:58   Log-Likelihood:                -17305.
No. Observations:                1460   AIC:                         3.468e+04
Df Residuals:                    1426   BIC:                         3.486e+04
Df Model:                          33                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const          4.412e+04   1.38e+06      0.032

In [16]:
#nope. so now we get rid of them
info_df = info_df.drop(['totalbsmtsf', 'firstflrsf', 'secondflrsf', 'openporchsf', 
                        'enclosedporch', 'threessnporch', 'screenporch'], axis=1)

In [17]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.815
Model:                            OLS   Adj. R-squared:                  0.812
Method:                 Least Squares   F-statistic:                     225.6
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:58   Log-Likelihood:                -17311.
No. Observations:                1460   AIC:                         3.468e+04
Df Residuals:                    1431   BIC:                         3.483e+04
Df Model:                          28                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const        -9.223e+04   1.38e+06     -0.067   

In [18]:
#now the fun starts. we go through and add dummy functions for every
#categorical value here. it would probably have been better to add
#them all simultaneaously but, given some of the columns had the same 
#values, I did it individually. I kept anything under .1 until the end
#in case adding in more variables brought them into significance
dummy = pd.get_dummies(info_df['fullbath'], drop_first=True)
dummy.columns = ['1 bath', '2 bath', '3 bath']
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)
dummy = pd.get_dummies(info_df['halfbath'], drop_first=True)
dummy.columns = ['1 half', '2 half']
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)
info_df = info_df.drop(['fullbath', 'halfbath'], axis=1)

In [19]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.822
Model:                            OLS   Adj. R-squared:                  0.818
Method:                 Least Squares   F-statistic:                     212.8
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:59   Log-Likelihood:                -17284.
No. Observations:                1460   AIC:                         3.463e+04
Df Residuals:                    1428   BIC:                         3.480e+04
Df Model:                          31                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const        -7.629e+05   1.36e+06     -0.559   

In [20]:
#I dropped 1 bath but left 2 bath in the hope that it would be brought
#into significance later.
info_df = info_df.drop(['1 bath'], axis=1)

In [21]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.822
Model:                            OLS   Adj. R-squared:                  0.818
Method:                 Least Squares   F-statistic:                     219.9
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:59   Log-Likelihood:                -17284.
No. Observations:                1460   AIC:                         3.463e+04
Df Residuals:                    1429   BIC:                         3.479e+04
Df Model:                          30                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const        -7.517e+05   1.36e+06     -0.551   

In [22]:
#even though this was statistically significant, it felt weird to include it
#if there was a value for 2 half baths had to be dropped, so I dropped this one
#in the hopes that it would bring the 2 half bath into significance; it did not
info_df = info_df.drop(['1 half'], axis=1)

In [23]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.821
Model:                            OLS   Adj. R-squared:                  0.818
Method:                 Least Squares   F-statistic:                     226.7
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:59   Log-Likelihood:                -17287.
No. Observations:                1460   AIC:                         3.463e+04
Df Residuals:                    1430   BIC:                         3.479e+04
Df Model:                          29                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const         -8.72e+05   1.36e+06     -0.639   

In [24]:
##for the purposes of this exercise, I opted to drop this column; in talking
##to a friend of mine later, he recommended scaling this variable down by 
##subtracting the minimum value from the column and then dividing by the 
##subsequent maximum value to get a scaled column from zero to one
### sub_value = (value - min.value)
### scaled_value = sub_value / max.sub_value
info_df = info_df.drop(['yrsold'], axis=1)

In [25]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.821
Model:                            OLS   Adj. R-squared:                  0.818
Method:                 Least Squares   F-statistic:                     234.9
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:59   Log-Likelihood:                -17287.
No. Observations:                1460   AIC:                         3.463e+04
Df Residuals:                    1431   BIC:                         3.479e+04
Df Model:                          28                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const        -1.194e+06   1.36e+05     -8.813   

In [26]:
#these variables were initially significant but became insignificant
info_df = info_df.drop([50, 70, 75], axis=1)

In [27]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.821
Model:                            OLS   Adj. R-squared:                  0.818
Method:                 Least Squares   F-statistic:                     263.2
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:59   Log-Likelihood:                -17288.
No. Observations:                1460   AIC:                         3.463e+04
Df Residuals:                    1434   BIC:                         3.477e+04
Df Model:                          25                                         
Covariance Type:            nonrobust                                         
                   coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------
const        -1.225e+06   1.18e+05    -10.392   

In [28]:
#continuing checking the values. occasionally, all of the values
#were found to be insignificant and so I overwrote the code with
#the next column name or just included the significant columns
dummy = pd.get_dummies(houseprices_df['street'], drop_first=True)
info_df['on paved road'] = dummy['Pave']

In [29]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.821
Model:                            OLS   Adj. R-squared:                  0.818
Method:                 Least Squares   F-statistic:                     253.6
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:37:59   Log-Likelihood:                -17286.
No. Observations:                1460   AIC:                         3.463e+04
Df Residuals:                    1433   BIC:                         3.477e+04
Df Model:                          26                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const         -1.251e+06   1.19e+05    -10.543

In [30]:
#this was one where I only included the significant one, which
#gets pulled at the end
dummy = pd.get_dummies(houseprices_df['lotshape'])
info_df['IR3 lot shape'] = dummy['IR3']

In [31]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.824
Model:                            OLS   Adj. R-squared:                  0.820
Method:                 Least Squares   F-statistic:                     247.5
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:00   Log-Likelihood:                -17278.
No. Observations:                1460   AIC:                         3.461e+04
Df Residuals:                    1432   BIC:                         3.476e+04
Df Model:                          27                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const         -1.258e+06   1.18e+05    -10.657

In [32]:
#in this one, I deleted the one that would likely be considered
#the most desirable so that everything could be seen as in
#relation to level
dummy = pd.get_dummies(houseprices_df['landcontour'])
del dummy['Lvl']
dummy.columns = ['Banked contour', 'Hillside contour', 'Depressed contour']
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)

In [33]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.827
Model:                            OLS   Adj. R-squared:                  0.823
Method:                 Least Squares   F-statistic:                     227.7
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:00   Log-Likelihood:                -17263.
No. Observations:                1460   AIC:                         3.459e+04
Df Residuals:                    1429   BIC:                         3.475e+04
Df Model:                          30                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const             -1.242e+06   1.17e+0

In [34]:
#this only had two values, AllPub and NoSeWa, so I didn't have to merge
dummy = pd.get_dummies(houseprices_df['utilities'], drop_first=True)
info_df['No Sewer / Water'] = dummy['NoSeWa']

In [35]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.827
Model:                            OLS   Adj. R-squared:                  0.824
Method:                 Least Squares   F-statistic:                     220.6
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:00   Log-Likelihood:                -17262.
No. Observations:                1460   AIC:                         3.459e+04
Df Residuals:                    1428   BIC:                         3.476e+04
Df Model:                          31                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const             -1.246e+06   1.17e+0

In [36]:
#another where only one was significant
dummy = pd.get_dummies(houseprices_df['lotconfig'])
info_df['on culdsac'] = dummy['CulDSac']

In [37]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.828
Model:                            OLS   Adj. R-squared:                  0.824
Method:                 Least Squares   F-statistic:                     214.5
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:00   Log-Likelihood:                -17260.
No. Observations:                1460   AIC:                         3.459e+04
Df Residuals:                    1427   BIC:                         3.476e+04
Df Model:                          32                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const             -1.224e+06   1.17e+0

In [38]:
#more categorical values..
dummy = pd.get_dummies(houseprices_df['neighborhood'], drop_first=True)
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)

In [39]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.860
Model:                            OLS   Adj. R-squared:                  0.855
Method:                 Least Squares   F-statistic:                     154.4
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:01   Log-Likelihood:                -17107.
No. Observations:                1460   AIC:                         3.433e+04
Df Residuals:                    1403   BIC:                         3.463e+04
Df Model:                          56                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const             -9.511e+05    1.4e+0

In [40]:
#I later thought of a better way to do these tests, but at this point, I 
#would add them then delete the ones that I figured would be immediately insignificant
info_df = info_df.drop(['Blueste', 'BrDale', 'Crawfor', 'NPkVill', 'Somerst', 'Veenker'], axis=1)

In [41]:
#only one was close to significant
dummy = pd.get_dummies(houseprices_df['bldgtype'])
info_df['Twnhs'] = dummy['Twnhs']

In [42]:
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.860
Model:                            OLS   Adj. R-squared:                  0.855
Method:                 Least Squares   F-statistic:                     170.2
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:01   Log-Likelihood:                -17106.
No. Observations:                1460   AIC:                         3.432e+04
Df Residuals:                    1408   BIC:                         3.459e+04
Df Model:                          51                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const              -1.02e+06   1.28e+0

In [43]:
#the r in research stands for repetition
dummy = pd.get_dummies(houseprices_df['roofmatl'], drop_first=True)
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.887
Model:                            OLS   Adj. R-squared:                  0.882
Method:                 Least Squares   F-statistic:                     189.4
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:01   Log-Likelihood:                -16953.
No. Observations:                1460   AIC:                         3.402e+04
Df Residuals:                    1401   BIC:                         3.434e+04
Df Model:                          58                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const             -1.657e+06   1.22e+0

In [44]:
#the r in research stands for repetition
dummy = pd.get_dummies(houseprices_df['exterior1st'])
info_df['Plywood'] = dummy['Plywood']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.887
Model:                            OLS   Adj. R-squared:                  0.883
Method:                 Least Squares   F-statistic:                     187.0
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:01   Log-Likelihood:                -16950.
No. Observations:                1460   AIC:                         3.402e+04
Df Residuals:                    1400   BIC:                         3.434e+04
Df Model:                          59                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const             -1.657e+06   1.22e+0

In [45]:
#the r in research stands for repetition
dummy = pd.get_dummies(houseprices_df['exterqual'])
del dummy['Ex']
dummy.columns = ['Ext Fair Qual', 'Ext Good Qual', 'Ext Avg Qual']
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.894
Model:                            OLS   Adj. R-squared:                  0.889
Method:                 Least Squares   F-statistic:                     189.1
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:01   Log-Likelihood:                -16909.
No. Observations:                1460   AIC:                         3.394e+04
Df Residuals:                    1397   BIC:                         3.428e+04
Df Model:                          62                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const             -1.548e+06   1.23e+0

In [46]:
#I ended up just adding them individually rather then merging
#because I wanted to rename them and I wondered if this
#would be faster; unsure which is faster
dummy = pd.get_dummies(houseprices_df['foundation'])
info_df['Poured Cncrt fndtn'] = dummy['PConc']
info_df['Slab fndtn'] = dummy['Slab']
info_df['Wood fndtn'] = dummy['Wood']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.895
Model:                            OLS   Adj. R-squared:                  0.890
Method:                 Least Squares   F-statistic:                     182.1
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:01   Log-Likelihood:                -16901.
No. Observations:                1460   AIC:                         3.393e+04
Df Residuals:                    1394   BIC:                         3.428e+04
Df Model:                          65                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const              -1.446e+06   1.28

In [47]:
#again, deleted the 'excellent' column so everything
#would be in comparison to that
dummy = pd.get_dummies(houseprices_df['kitchenqual'])
del dummy['Ex']
dummy.columns = ['Kitchen Fair Qual', 'Kitchen Good Qual', 'Kitchen Avg Qual']
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.899
Model:                            OLS   Adj. R-squared:                  0.894
Method:                 Least Squares   F-statistic:                     182.1
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:02   Log-Likelihood:                -16870.
No. Observations:                1460   AIC:                         3.388e+04
Df Residuals:                    1391   BIC:                         3.424e+04
Df Model:                          68                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const              -1.377e+06   1.27

In [48]:
#I wanted these to be in comparison to Typ (typical) only,
#but Maj2 and Min2 were also extremely insignificant
dummy = pd.get_dummies(houseprices_df['functional'])
dummy = dummy.drop(['Maj2', 'Typ', 'Min2'], axis=1)
dummy.columns = ['Maj Deductions Fnctn', 'Min Deduction Fnctn', 'Mod Deductions Fnctn', 
                 'Severe Deduction Fnctn']
info_df = pd.merge(info_df, dummy, left_index=True, right_index=True)
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.900
Model:                            OLS   Adj. R-squared:                  0.895
Method:                 Least Squares   F-statistic:                     173.8
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:02   Log-Likelihood:                -16861.
No. Observations:                1460   AIC:                         3.387e+04
Df Residuals:                    1387   BIC:                         3.425e+04
Df Model:                          72                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.39

In [49]:
#more renaming
dummy = pd.get_dummies(houseprices_df['salecondition'])
info_df['Normal Sale'] = dummy['Normal']
info_df['Partial Sale'] = dummy['Partial']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.902
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     172.2
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:02   Log-Likelihood:                -16849.
No. Observations:                1460   AIC:                         3.385e+04
Df Residuals:                    1385   BIC:                         3.424e+04
Df Model:                          74                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                   -1.3

strong multicollinearity or other numerical problems.


In [50]:
#having added all the dummies, we now can remove values previously added
#that became insignificant
del info_df['Depressed contour']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.902
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     174.7
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:02   Log-Likelihood:                -16849.
No. Observations:                1460   AIC:                         3.385e+04
Df Residuals:                    1386   BIC:                         3.424e+04
Df Model:                          73                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                   -1.3

In [51]:
#sadly, 2 bath remained insignificant
del info_df['2 bath']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.902
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     177.2
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:02   Log-Likelihood:                -16849.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1387   BIC:                         3.423e+04
Df Model:                          72                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.31

In [52]:
#as did 2 half
del info_df['2 half']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.902
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     179.7
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:02   Log-Likelihood:                -16849.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1388   BIC:                         3.422e+04
Df Model:                          71                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.31

In [53]:
#more trimming
del info_df[80]
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.902
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     182.4
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:02   Log-Likelihood:                -16849.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1389   BIC:                         3.422e+04
Df Model:                          70                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.30

In [54]:
#trim some more
del info_df[30]
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.902
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     185.0
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:03   Log-Likelihood:                -16850.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1390   BIC:                         3.421e+04
Df Model:                          69                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.27

In [55]:
#cut some more
del info_df[60]
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.902
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     187.7
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:03   Log-Likelihood:                -16850.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1391   BIC:                         3.420e+04
Df Model:                          68                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.24

In [56]:
#this was one that I believe was initially significant that become
#insignificant once more features were added and removed
del info_df['IR3 lot shape']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.902
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     190.4
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:03   Log-Likelihood:                -16851.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1392   BIC:                         3.420e+04
Df Model:                          67                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.24

In [57]:
#this became insignificant
del info_df['bsmtfullbath']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.902
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     193.2
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:03   Log-Likelihood:                -16852.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1393   BIC:                         3.419e+04
Df Model:                          66                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.24

In [58]:
#keep pulling them features one by one
del info_df['Wood fndtn']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.901
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     196.2
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:03   Log-Likelihood:                -16853.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1394   BIC:                         3.419e+04
Df Model:                          65                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.23

In [59]:
#and another one bites the dust
del info_df['No Sewer / Water']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.901
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     199.0
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:03   Log-Likelihood:                -16854.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1395   BIC:                         3.418e+04
Df Model:                          64                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.24

In [60]:
#more for the slaughter
del info_df['Ext Fair Qual']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.901
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     201.9
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:04   Log-Likelihood:                -16855.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1396   BIC:                         3.418e+04
Df Model:                          63                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.25

In [61]:
#just barely insignificant if we use P < 0.05
del info_df['Twnhs']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.901
Model:                            OLS   Adj. R-squared:                  0.897
Method:                 Least Squares   F-statistic:                     204.9
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:04   Log-Likelihood:                -16856.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1397   BIC:                         3.417e+04
Df Model:                          62                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.25

In [62]:
#I wish I could have kept mod, but oh well
del info_df['Mod Deductions Fnctn']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.901
Model:                            OLS   Adj. R-squared:                  0.896
Method:                 Least Squares   F-statistic:                     207.9
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:04   Log-Likelihood:                -16858.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1398   BIC:                         3.417e+04
Df Model:                          61                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.24

In [63]:
#it brought min out of significance, so we'll pull it to see if we can 
#keep major in
del info_df['Min Deduction Fnctn']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.900
Model:                            OLS   Adj. R-squared:                  0.896
Method:                 Least Squares   F-statistic:                     210.9
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:04   Log-Likelihood:                -16860.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1399   BIC:                         3.416e+04
Df Model:                          60                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.24

In [64]:
#it didn't, so we cur maj as well
del info_df['Maj Deductions Fnctn']
#run the tuning function
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.900
Model:                            OLS   Adj. R-squared:                  0.896
Method:                 Least Squares   F-statistic:                     214.0
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:04   Log-Likelihood:                -16862.
No. Observations:                1460   AIC:                         3.384e+04
Df Residuals:                    1400   BIC:                         3.416e+04
Df Model:                          59                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.24

In [65]:
#now that we FINALLY have our statistically significant data, we can run our predictions
lrm_test()

OLS Regression
R-squared of the model in the training set is: 0.9175940399135784
R-squared of the model in the test set is: 0.7845623690347873

Mean absolute error of the prediction is: 15827.69367927762
Mean squared error of the prediction is: 1.071772e+09
Root mean squared error of the prediction is: 32737.93509742331
Mean absolute percentage error of the prediction is: 9.687289969495124


In [66]:
lasso_test()

R-squared of the model in the training set is: 0.7176574297095706
R-squared of the model in the test set is: 0.6457417755828971

Mean absolute error of the prediction is: 24925.29258437263
Mean squared error of the prediction is: 1.762386e+09
Root mean squared error of the prediction is: 41980.77693584046
Mean absolute percentage error of the prediction is: 15.019875404098535


In [67]:
ridge_test()

Ridge Regression
R-squared of the model in the training set is: 0.8857570918687354
R-squared of the model in the test set is: 0.8240016647831889

Mean absolute error of the prediction is: 15656.79613136504
Mean squared error of the prediction is: 8.755674e+08
Root mean squared error of the prediction is: 29589.987445924755
Mean absolute percentage error of the prediction is: 9.467783700230987


In [68]:
en_test()

Elastic Net Regression
R-squared of the model in the training set is: 0.6168978264738623
R-squared of the model in the test set is: 0.561674909142513

Mean absolute error of the prediction is: 30944.21519302323
Mean squared error of the prediction is: 2.180607e+09
Root mean squared error of the prediction is: 46696.96624660817
Mean absolute percentage error of the prediction is: 19.48570360643981


Based on these results, I would use Ridge. While OLS has a higher R-squared, the test model aligns more closely with Ridge as well as having lower stats for all other 

In [69]:
#the assignment asks us to add in things like interest rates or economic activity.
#based on the info found at https://www.valuepenguin.com/mortgages/historical-mortgage-rates#hist,
#I made a new column to add to the chart with the proper values based on the year sold.
##As a note, these are 30 year morgage rates.
rates = [6.41, 6.34, 6.03, 5.04, 4.69]
conditions = [houseprices_df['yrsold'] == 2006, houseprices_df['yrsold'] == 2007,
              houseprices_df['yrsold'] == 2008, houseprices_df['yrsold'] == 2009,
              houseprices_df['yrsold'] == 2010]
info_df['Avg Rate When Sold'] = np.select(conditions, rates)

In [70]:
#now to rerun the tuning
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.900
Model:                            OLS   Adj. R-squared:                  0.896
Method:                 Least Squares   F-statistic:                     210.3
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:05   Log-Likelihood:                -16862.
No. Observations:                1460   AIC:                         3.385e+04
Df Residuals:                    1399   BIC:                         3.417e+04
Df Model:                          60                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.24

Based on the extremely high p-value, adding it to this model adds no value to the model.

In [72]:
#adding a column for average 30 year mortgage to see if it has an effect on
#the model: the data was found at https://www.valuepenguin.com/mortgages/historical-mortgage-rates
#and given a value based on the yrsold information
rates = [6.41, 6.34, 6.03, 5.04, 4.69]
conditions = [houseprices_df['yrsold'] == 2006, houseprices_df['yrsold'] == 2007,
              houseprices_df['yrsold'] == 2008, houseprices_df['yrsold'] == 2009,
              houseprices_df['yrsold'] == 2010]
info_df['Avg Rate When Sold'] = np.select(conditions, rates)
tuning()

                            OLS Regression Results                            
Dep. Variable:              saleprice   R-squared:                       0.900
Model:                            OLS   Adj. R-squared:                  0.896
Method:                 Least Squares   F-statistic:                     210.3
Date:                Wed, 20 Jan 2021   Prob (F-statistic):               0.00
Time:                        17:38:20   Log-Likelihood:                -16862.
No. Observations:                1460   AIC:                         3.385e+04
Df Residuals:                    1399   BIC:                         3.417e+04
Df Model:                          60                                         
Covariance Type:            nonrobust                                         
                             coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------
const                  -1.24

Based on the very high p-value, we can conclude that the mortgage rate is not a good feature for this model.