Import Dependencies and Data

In [None]:
# import dependencies
%matplotlib inline
from matplotlib import pyplot as plt
from sklearn.datasets import make_classification
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.linear_model import LinearRegression
import tensorflow as tf
import seaborn as sns
sns.set_theme(style="darkgrid")

In [None]:
pip uninstall tensorflow

In [None]:
pip install tensorflow-1.6.0-cp36-cp36m-win_amd64.whl

In [None]:
# additional dependencies
# library for regular expressions aka Regex
import re
import psycopg2
from sqlalchemy import create_engine
from config import db_password

In [None]:
# create the database engine
db_string = f"postgresql://postgres:{db_password}@127.0.0.1:5432/AirBnb"
engine = create_engine(db_string)

In [None]:
# create the dataframe
resultset = engine.execute("SELECT * FROM properties")
properties_df = pd.DataFrame(resultset.fetchall())
properties_df.columns = resultset.keys()
properties_df

Preprocessing

In [None]:
# Preprocessing of the data
properties_df.count()

In [None]:
# Preprocessing of the data
dfClean = properties_df.dropna()

In [None]:
# Preprocessing of the data
dfClean.describe()

In [None]:
# Preprocessing of the data
dfClean.shape

In [None]:
dfClean.nunique()

Preprocessing the Data

In [None]:
dfClean["zip"]

In [None]:
dfClean["room_type"]

In [None]:
dfClean["airbnb_neighborhood_id"]

In [None]:
dfClean.info()

In [None]:
# Drop the non-beneficial ID columns, 'id', 'airbnb_id', "night_price_native", "weekly_price", "monthly_price", "cleaning_fee_native_price", "occupancy", "lat", "lon", "room_type", "zip", "airbnb_neighborhood_id".
dfClean = dfClean.drop(["id", "airbnb_id", "night_price_native", "weekly_price", "monthly_price", "cleaning_fee_native_price", "occupancy", "lat", "lon", "room_type", "zip", "airbnb_neighborhood_id", "property_type", "reviews_count"], 1)

In [None]:
dfClean.info()

In [None]:
# Transform num_of_baths to numeric
dfClean["num_of_baths"] = pd.to_numeric(dfClean["num_of_baths"])
dfClean.info()

Binning Time

In [None]:
# get column names
dfClean.columns

In [None]:
# Use get_dummies() to create variables for text features.
dfClean = pd.get_dummies(dfClean, columns=['night_price', 'num_of_baths', 'num_of_rooms', 'nights_booked', 'airbnb_city', 'capacity_of_people', 'num_of_beds', 'star_rating'])

In [None]:
dfClean.shape

In [None]:
dfClean.describe()

In [None]:
dfClean.nunique()

Set up for ML

In [None]:
# Split our preprocessed data into our features and target arrays
X_df = dfClean.drop(["rental_income"], 1)

X = X_df.values

y = dfClean["rental_income"].values

In [None]:
# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_tests = train_test_split(X, y, random_state=78)

In [None]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# test
len(X_train[0])

Compile, Train and Evaluate the model

In [None]:
# Define the model - deep neural net, i.e., the number of input features and hidden nodes for each layer.
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 = number_input_features * 3
hidden_nodes_layer2 = number_input_features * 2
hidden_nodes_layer3 = number_input_features * 1

nn = tf.keras.models.Sequential()
nn._name = "Neural_Network_Model"

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu", name="1st_Layer_RELU")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu", name="2nd_Layer_relu"))

# Third hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer3, activation="relu", name="3rd_Layer_relu"))

# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid", name="Outer_Layer_SIGMOID"))

# Check the structure of the model
nn.summary()

In [None]:
len(X_train_scaled[0])

In [None]:
# Compile the model
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
# Train the model
fit_model = nn.fit(X_train_scaled,y_train,epochs=100)

In [None]:
# Create a DataFrame containing training History
history_df = pd.DataFrame(fit_model.history, index=range(1,len(fit_model.history["loss"])+1))

# Plot the loss
history_df.plot(y="loss")

In [None]:
# Plot the accuracy
history_df.plot(y="accuracy")

In [None]:
# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_tests,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

RandomForestClassifier

In [None]:
# import dependencies
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

In [1]:
# create a random forest classifier
rf_model = RandomForestClassifier(n_estimators=200, random_state=1)

# fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)
                        
# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_tests,y_pred):.3f}") 

NameError: name 'RandomForestClassifier' is not defined