In [9]:
import os
import numpy as np
import pandas as pd

import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import model_from_json
from keras import regularizers

tf.keras.backend.set_floatx('float32')

In [10]:
# read in data:
df = pd.read_csv('/content/drive/MyDrive/AB_US_2020.csv')
df.shape

  interactivity=interactivity, compiler=compiler, result=result)


(226030, 17)

In [11]:
df.columns

Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',
       'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',
       'minimum_nights', 'number_of_reviews', 'last_review',
       'reviews_per_month', 'calculated_host_listings_count',
       'availability_365', 'city'],
      dtype='object')

In [12]:
# Drop Unusable columns
columns = ['id',
           'neighbourhood_group',
           'last_review',
           'reviews_per_month',
           'name',
           'host_name',
           'latitude',
           'longitude',
           'number_of_reviews',
           'host_id',
           'calculated_host_listings_count']

df = df.drop(columns, axis=1)
df.shape

(226030, 6)

In [17]:
# factorize non-numeric categorical data,
# simple process on data for preparation to train


df['neighbourhood'] = pd.factorize(df.neighbourhood)[0]
df['room_type'] = pd.factorize(df.room_type)[0]
df['city'] = pd.factorize(df.city)[0]
df['price'] = df['price'].astype(float)

df = df[df['price'] <= 501]

df.shape

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/p

(212300, 6)

In [18]:
df.isnull().sum()

neighbourhood       0
room_type           0
price               0
minimum_nights      0
availability_365    0
city                0
dtype: int64

In [27]:
# Set target variables for training
target = ['price']
df_low = df.drop(target, axis=1)
X = df_low.values
Y = df[target].values

print(X.shape, Y.shape)

(212300, 5) (212300, 1)


In [28]:
MMS = MinMaxScaler()
X = MMS.fit_transform(X)

In [29]:
# Train/Test Split:

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=42)

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)

(169840, 5) (42460, 5)
(169840, 1) (42460, 1)


In [30]:
# Set regularization and NN features:
learn_rate_reducer = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', patience=2, factor=0.2, min_lr=0.001, cooldown=2)
early_stop = tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

callbacks = [learn_rate_reducer, early_stop]

rmse = tf.keras.metrics.RootMeanSquaredError()

In [33]:
# Model:

nn = Sequential()

nn.add(Dense(128, input_shape=(X_train.shape[1],), kernel_regularizer=regularizers.l1(0.005), activation='relu'))
nn.add(Dropout(0.25))
nn.add(Dense(256, kernel_regularizer=regularizers.l1(0.005), activation='relu'))
nn.add(Dropout(0.15))
nn.add(Dense(512, kernel_regularizer=regularizers.l2(0.005), activation='relu'))
nn.add(Dropout(0.3))
nn.add(Dense(64, kernel_regularizer=regularizers.l2(0.005), activation='relu'))
nn.add(Dense(1, activation='linear'))

nn.compile(loss='mean_squared_error',
           optimizer='nadam',
           metrics=['mean_absolute_error', rmse])

# Model summary:
print(nn.summary())

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_15 (Dense)             (None, 128)               768       
_________________________________________________________________
dropout_9 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_16 (Dense)             (None, 256)               33024     
_________________________________________________________________
dropout_10 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_17 (Dense)             (None, 512)               131584    
_________________________________________________________________
dropout_11 (Dropout)         (None, 512)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 64)               

In [34]:
history = nn.fit(X_train,
                 y_train,
                 epochs=50,
                 batch_size=64,
                 validation_data=(X_test, y_test),
                 callbacks=callbacks)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50


In [74]:
# Serialize Model to JSON:
import joblib

np.random.seed(12)

model_json = nn.to_json()
with open("nn.json", "w") as json_file:
  json_file.write(model_json)

nn.save_weights('nn.h5')

joblib.dump(MMS, 'MMS.gz')


['MMS.gz']

In [37]:
# Load Serialized Model to Reuse:

json_file = open('nn.json', 'r')
loaded_model_json = json_file.read()
json_file.close()

loaded_model = model_from_json(loaded_model_json)
loaded_model.load_weights('nn.h5')

In [71]:
# Test on prediction:

pred = np.array([[1,2,2,300,20]])

transformed_prediction = MMS.transform(pred)

In [72]:
# Original Model:

nn.predict(transformed_prediction)[0][0]

185.5262

In [73]:
# Loaded Model w/ saved weights:

loaded_model.predict(transformed_prediction)[0][0]


185.5262

In [78]:
from sklearn.preprocessing import MinMaxScaler
import joblib

def predict_opt_price(neighborhood, room_type, minimum_nights, availability_90, city):
  # Import weights and load model to predict:
  json_file = open('nn.json', 'r')
  loaded_model_json = json_file.read()
  json_file.close()

  loaded_model = model_from_json(loaded_model_json)
  loaded_model.load_weights('nn.h5')

  # Load Scaler:
  MMS = joblib.load('MMS.gz')

  raw_data = np.array([[neighborhood, room_type, minimum_nights, availability_90, city]])

  transformed = MMS.transform(raw_data)

  prediction = loaded_model.predict(transformed)[0][0]

  return prediction

  



In [79]:
predict_opt_price(neighborhood=0, room_type=1, minimum_nights=5, availability_90=70, city=21)

186.46158