In [1]:
import warnings
warnings.filterwarnings('ignore')

# import preprocessing function
from helpers.preprocessing import *

# import dependencies
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
from sqlalchemy import create_engine
import psycopg2
from config import db_password

# import machine learning dependencies
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# import neural network dependencies
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

## Read the Data from SQL

In [2]:
X, y = preprocessing('fall')

# drop beds and bedrooms, as they probably have high colinearity with acommodates
X.drop(['beds', 'bedrooms'], axis = 1, inplace = True)

X.head()

Unnamed: 0,accommodates,longitude,latitude,availability_90,minimum_nights,host_listings_count,review_scores_rating,bathrooms,room_Entire home/apt,room_Hotel room,room_Private room,room_Shared room,property_Camper/RV,property_Entire Home,property_Entire Unit,property_Other,property_Private room,property_Shared room
0,8,-117.2576,32.80751,6,4,5.0,4.76,2.0,1,0,0,0,0,1,0,0,0,0
1,1,-117.21931,32.74217,54,1,3.0,4.33,1.0,0,0,1,0,0,0,0,0,1,0
2,7,-117.25416,32.79783,0,6,2.0,4.75,2.5,1,0,0,0,0,1,0,0,0,0
3,8,-117.25728,32.80751,44,6,2.0,4.72,2.0,1,0,0,0,0,1,0,0,0,0
4,3,-117.26856,32.81301,42,5,2.0,4.94,1.0,1,0,0,0,0,1,0,0,0,0


# Split the Data into Training and Testing

In [3]:
X.describe()

Unnamed: 0,accommodates,longitude,latitude,availability_90,minimum_nights,host_listings_count,review_scores_rating,bathrooms,room_Entire home/apt,room_Hotel room,room_Private room,room_Shared room,property_Camper/RV,property_Entire Home,property_Entire Unit,property_Other,property_Private room,property_Shared room
count,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0,14026.0
mean,4.866391,-117.173776,32.763467,48.910737,6.695066,57.06053,4.749504,1.603629,0.838372,0.001069,0.155497,0.005062,0.005989,0.406459,0.422644,0.003066,0.15678,0.005062
std,3.133007,0.074927,0.096004,31.546411,19.525722,215.271203,0.387233,0.984302,0.368123,0.032686,0.362391,0.07097,0.077159,0.49119,0.493997,0.055286,0.363606,0.07097
min,1.0,-117.28258,32.51986,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,-117.248773,32.719043,19.0,1.0,1.0,4.71,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,4.0,-117.166917,32.756855,55.0,2.0,4.0,4.82,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,6.0,-117.129102,32.79899,77.0,4.0,18.0,4.95,2.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
max,16.0,-116.89656,33.12569,90.0,999.0,2641.0,5.0,27.5,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [4]:
# split data into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [5]:
# scale the X data
# Creating a StandardScaler instance.
scaler = StandardScaler()
# Fitting the Standard Scaler with the training data.
X_scaler = scaler.fit(X_train)

# Scaling the data.
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

## Multiple Linear Regression Model

In [6]:
reg = LinearRegression()
reg.fit(X_train_scaled, y_train)
y_pred = reg.predict(X_test_scaled)

In [7]:
print(f'Training R^2: {reg.score(X_train_scaled, y_train)}')
print(f'Testing R^2: {reg.score(X_test_scaled, y_test)}')

Training R^2: 0.05565394173453464
Testing R^2: 0.12474821777548473


In [8]:
# Display the coefficients in the model
results_df = pd.DataFrame({'Values': X.columns, 'Coef':reg.coef_})
results_df

Unnamed: 0,Values,Coef
0,accommodates,91.20256
1,longitude,-56.17257
2,latitude,2.524792
3,availability_90,-72.38079
4,minimum_nights,10.12945
5,host_listings_count,163.4409
6,review_scores_rating,-2.673577
7,bathrooms,144.2249
8,room_Entire home/apt,777212700000000.0
9,room_Hotel room,67891030000000.0


## Neural Network

In [9]:
X_train_scaled.shape

(10519, 18)

In [20]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = X_train_scaled.shape[1]
hidden_nodes_layer1 = 60
hidden_nodes_layer2 = 120
hidden_nodes_layer3 = 120

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu"))

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))

nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="linear"))

# Check the structure of the model
nn.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 60)                1140      
                                                                 
 dense_8 (Dense)             (None, 120)               7320      
                                                                 
 dense_9 (Dense)             (None, 120)               14520     
                                                                 
 dense_10 (Dense)            (None, 1)                 121       
                                                                 
Total params: 23,101
Trainable params: 23,101
Non-trainable params: 0
_________________________________________________________________


In [21]:
nn.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.1),
    loss='mean_absolute_error')

In [22]:
fit_model = nn.fit(X_train_scaled,y_train,epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [23]:
# Evaluate the mean absolute error of the  model using the test data
mae = nn.evaluate(X_test_scaled,y_test,verbose=0)
print(f"Mean Absolute Error: {mae}")

Mean Absolute Error: 113.75089263916016
