In [1]:
import warnings
warnings.filterwarnings('ignore')

# import preprocessing function
from helpers.preprocessing import *

# import dependencies
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
from sqlalchemy import create_engine
import psycopg2
from config import db_password

# import machine learning dependencies
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# import neural network dependencies
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Read the Data from SQL

In [2]:
X, y = preprocessing('fall')

# drop beds and bedrooms, as they probably have high colinearity with acommodates
X.drop(['beds', 'bedrooms'], axis = 1, inplace = True)

X.head()

Unnamed: 0,accommodates,longitude,latitude,review_scores_rating,bathrooms,room_Entire home/apt,room_Hotel room,room_Private room,room_Shared room,property_Camper/RV,property_Entire Home,property_Entire Unit,property_Other,property_Private room,property_Shared room
0,8,-117.2576,32.80751,4.76,2.0,1,0,0,0,0,1,0,0,0,0
1,1,-117.21931,32.74217,4.33,1.0,0,0,1,0,0,0,0,0,1,0
2,7,-117.25416,32.79783,4.75,2.5,1,0,0,0,0,1,0,0,0,0
3,8,-117.25728,32.80751,4.72,2.0,1,0,0,0,0,1,0,0,0,0
4,3,-117.26856,32.81301,4.94,1.0,1,0,0,0,0,1,0,0,0,0


# Split the Data into Training and Testing

In [3]:
X.describe()

Unnamed: 0,accommodates,longitude,latitude,review_scores_rating,bathrooms,room_Entire home/apt,room_Hotel room,room_Private room,room_Shared room,property_Camper/RV,property_Entire Home,property_Entire Unit,property_Other,property_Private room,property_Shared room
count,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0
mean,4.866391,-117.173776,32.763467,4.749504,1.603629,0.838372,0.001069,0.155497,0.005062,0.005989,0.406459,0.422644,0.003066,0.15678,0.005062
std,3.133007,0.074927,0.096004,0.387233,0.984302,0.368123,0.032686,0.362391,0.07097,0.077159,0.49119,0.493997,0.055286,0.363606,0.07097
min,1.0,-117.28258,32.51986,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,-117.248773,32.719043,4.71,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,-117.166917,32.756855,4.82,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,6.0,-117.129102,32.79899,4.95,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
max,16.0,-116.89656,33.12569,5.0,27.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [4]:
# split data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [5]:
# scale the X data
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Multiple Linear Regression Model

In [6]:
reg = LinearRegression()
reg.fit(X_train_scaled, y_train)
y_pred = reg.predict(X_test_scaled)

In [7]:
print(f'Training R^2: {reg.score(X_train_scaled, y_train)}')
print(f'Testing R^2: {reg.score(X_test_scaled, y_test)}')

Training R^2: 0.09456832376223434
Testing R^2: 0.014471680833533451


In [8]:
# Display the coefficients in the model
results_df = pd.DataFrame({'Values': X.columns, 'Coef':reg.coef_})
results_df

Unnamed: 0,Values,Coef
0,accommodates,85.45343
1,longitude,-41.78743
2,latitude,28.7076
3,review_scores_rating,-16.96696
4,bathrooms,160.0779
5,room_Entire home/apt,-2099757000000000.0
6,room_Hotel room,-200175400000000.0
7,room_Private room,-2064866000000000.0
8,room_Shared room,255078300000000.0
9,property_Camper/RV,877214700000000.0


## Neural Network

In [9]:
X_train_scaled.shape

(10519, 15)

In [10]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = X_train_scaled.shape[1]
hidden_nodes_layer1 = 63
hidden_nodes_layer2 = 30

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1))

# Check the structure of the model
nn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 63)                1008      
                                                                 
 dense_1 (Dense)             (None, 30)                1920      
                                                                 
 dense_2 (Dense)             (None, 1)                 31        
                                                                 
Total params: 2,959
Trainable params: 2,959
Non-trainable params: 0
_________________________________________________________________


In [11]:
nn.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [12]:
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 98/100
Epoch 99/100
Epoch 100/100


In [13]:
# Evaluate the mean absolute error of the  model using the test data
mae = nn.evaluate(X_test_scaled,y_test,verbose=0)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 179.67294311523438
