In [1]:
# Tensor flow dependancies

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing


In [3]:
pip install -q seaborn

Note: you may need to restart the kernel to use updated packages.


In [2]:
# other python dependencies

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
# importing data

# Study data files
file_path = "AirbnbData.csv"


# Read the mouse data and the study results
airbnb_df = pd.read_csv(file_path)

airbnb_df.head()
 

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,10/19/2018,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,5/21/2019,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,7/5/2019,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,11/19/2018,0.1,1,0


In [4]:
# creating a copy of the dataset to work with in case anything happens
dataset = airbnb_df.copy()

# preview dataset
dataset.fillna(0, inplace = True)


dataset.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,10/19/2018,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,5/21/2019,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,0,0.0,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,7/5/2019,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,11/19/2018,0.1,1,0


In [5]:
# dropping columns not needed
dataset = dataset.drop(columns=["id","name","host_id","host_name",
                                "last_review","neighbourhood","latitude","longitude","minimum_nights", 
                                "number_of_reviews","calculated_host_listings_count"])

# droping index
dataset.reset_index(drop=True, inplace=True)

# previewing data
dataset.head()

Unnamed: 0,neighbourhood_group,room_type,price,reviews_per_month,availability_365
0,Brooklyn,Private room,149,0.21,365
1,Manhattan,Entire home/apt,225,0.38,355
2,Manhattan,Private room,150,0.0,365
3,Brooklyn,Entire home/apt,89,4.64,194
4,Manhattan,Entire home/apt,80,0.1,0


In [6]:
# splitting dataset into x and y
x = dataset.drop(columns=["price"])
y = dataset["price"]


In [7]:
# Reformat data

# encoding categorical data
x = pd.get_dummies(x, prefix='', prefix_sep='')



# view shape of x data and preview
print(x.shape)
x.head()

(48895, 10)


Unnamed: 0,reviews_per_month,availability_365,Bronx,Brooklyn,Manhattan,Queens,Staten Island,Entire home/apt,Private room,Shared room
0,0.21,365,0,1,0,0,0,0,1,0
1,0.38,355,0,0,1,0,0,1,0,0
2,0.0,365,0,0,1,0,0,0,1,0
3,4.64,194,0,1,0,0,0,1,0,0
4,0.1,0,0,0,1,0,0,1,0,0


In [18]:
# reshaping y data inorder to use standard scaler 
y = y.values.reshape(-1,1)



In [19]:
# Split data into training and testing
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)

In [20]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data and test data
x_scaler = StandardScaler().fit(x_train)
y_scaler = StandardScaler().fit(y_train)

In [21]:
# Transform the training and testing data using the X_scaler and y_scaler

x_train_scaled = x_scaler.transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

In [24]:
y_train_scaled

array([[-0.31042082],
       [ 0.0857376 ],
       [ 0.11555598],
       ...,
       [-0.16132894],
       [ 0.75452118],
       [-0.39561618]])

In [14]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=30, activation='relu', input_dim=10))
model.add(Dense(units=1, activation='softmax'))

In [15]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                330       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 31        
Total params: 361
Trainable params: 361
Non-trainable params: 0
_________________________________________________________________


In [16]:
# Compile the model
model.compile(optimizer='adam',
              loss='mean_absolute_error')

In [22]:
# training model

model.fit(
    x_train_scaled,
    y_train_scaled,
    epochs=100,
    shuffle=True,
    verbose=0
)

<tensorflow.python.keras.callbacks.History at 0x1956ae2c5f8>

In [32]:
model.evaluate(x,y, verbose=0)

151.72113713277045

In [30]:

predictions = model.predict(x_test_scaled)

# a = plt.axes(aspect='equal')
# plt.scatter(y_test_scaled, predictions)
# plt.xlabel('True Prices')
# plt.ylabel('Predictions')
# lims = [-1, 1]
# plt.xlim(lims)
# plt.ylim(lims)
# _ = plt.plot(lims, lims)

print(predictions.shape)
print(y_test_scaled.shape)

(12224, 1)
(12224, 1)


In [9]:
# add normalization layer
# normalizer = preprocessing.Normalization()

In [10]:
# normalizer.adapt(np.array(X_train))
# normalizer.adapt(np.array(X_train))