In [1]:
# Tensor flow dependancies

import tensorflow as tf

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing


In [3]:
pip install -q seaborn

Note: you may need to restart the kernel to use updated packages.


In [2]:
# other python dependencies

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

In [3]:
# importing data

# Study data files
file_path = "AirbnbData.csv"


# Read the mouse data and the study results
airbnb_df = pd.read_csv(file_path)

airbnb_df.head()
 

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,10/19/2018,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,5/21/2019,0.38,2,355
2,3647,THE VILLAGE OF HARLEM....NEW YORK !,4632,Elisabeth,Manhattan,Harlem,40.80902,-73.9419,Private room,150,3,0,,,1,365
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,7/5/2019,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,11/19/2018,0.1,1,0


In [4]:
# creating a copy of the dataset to work with in case anything happens
dataset = airbnb_df.copy()

# preview dataset
dataset.dropna(inplace=True)

dataset.head()

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365
0,2539,Clean & quiet apt home by the park,2787,John,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,10/19/2018,0.21,6,365
1,2595,Skylit Midtown Castle,2845,Jennifer,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,5/21/2019,0.38,2,355
3,3831,Cozy Entire Floor of Brownstone,4869,LisaRoxanne,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,7/5/2019,4.64,1,194
4,5022,Entire Apt: Spacious Studio/Loft by central park,7192,Laura,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,11/19/2018,0.1,1,0
5,5099,Large Cozy 1 BR Apartment In Midtown East,7322,Chris,Manhattan,Murray Hill,40.74767,-73.975,Entire home/apt,200,3,74,6/22/2019,0.59,1,129


In [5]:
# dropping columns not needed
dataset = dataset.drop(columns=["id","name","host_id","host_name","last_review"])

# droping index
dataset.reset_index(drop=True, inplace=True)

# previewing data
dataset.head()

Unnamed: 0,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365
0,Brooklyn,Kensington,40.64749,-73.97237,Private room,149,1,9,0.21,6,365
1,Manhattan,Midtown,40.75362,-73.98377,Entire home/apt,225,1,45,0.38,2,355
2,Brooklyn,Clinton Hill,40.68514,-73.95976,Entire home/apt,89,1,270,4.64,1,194
3,Manhattan,East Harlem,40.79851,-73.94399,Entire home/apt,80,10,9,0.1,1,0
4,Manhattan,Murray Hill,40.74767,-73.975,Entire home/apt,200,3,74,0.59,1,129


In [7]:
# splitting dataset into x and y
x = dataset.drop(columns=["price"])
y = dataset["price"]


In [8]:
# Reformat data

# encoding categorical data
x = pd.get_dummies(x, prefix='', prefix_sep='')



# view shape of x data and preview
print(x.shape)
x.head()

(38821, 233)


Unnamed: 0,latitude,longitude,minimum_nights,number_of_reviews,reviews_per_month,calculated_host_listings_count,availability_365,Bronx,Brooklyn,Manhattan,...,Williamsbridge,Williamsburg,Willowbrook,Windsor Terrace,Woodhaven,Woodlawn,Woodside,Entire home/apt,Private room,Shared room
0,40.64749,-73.97237,1,9,0.21,6,365,0,1,0,...,0,0,0,0,0,0,0,0,1,0
1,40.75362,-73.98377,1,45,0.38,2,355,0,0,1,...,0,0,0,0,0,0,0,1,0,0
2,40.68514,-73.95976,1,270,4.64,1,194,0,1,0,...,0,0,0,0,0,0,0,1,0,0
3,40.79851,-73.94399,10,9,0.1,1,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
4,40.74767,-73.975,3,74,0.59,1,129,0,0,1,...,0,0,0,0,0,0,0,1,0,0


In [9]:
# Split data into training and testing
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1)

In [12]:
from sklearn.preprocessing import StandardScaler

# Create a StandardScater model and fit it to the training data
x_scaler = StandardScaler().fit(x_train)

In [13]:
# Transform the training and testing data using the X_scaler

x_train_scaled = x_scaler.transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

In [14]:
x_train_scaled

array([[ 0.04287875, -1.10795676,  0.06314525, ...,  0.95495473,
        -0.9136242 , -0.15003921],
       [-0.93527983, -0.67238891, -0.11252751, ...,  0.95495473,
        -0.9136242 , -0.15003921],
       [-0.51943118,  0.52627342,  0.06314525, ..., -1.04717006,
         1.09454193, -0.15003921],
       ...,
       [ 1.22869011,  0.38292735, -0.28820026, ...,  0.95495473,
        -0.9136242 , -0.15003921],
       [ 1.33658989,  0.08751537, -0.17108509, ...,  0.95495473,
        -0.9136242 , -0.15003921],
       [-0.56360491, -0.48055189,  1.40996972, ..., -1.04717006,
         1.09454193, -0.15003921]])

In [21]:
# first, create a normal neural network with 2 inputs, 6 hidden nodes, and 2 outputs
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=154, activation='relu', input_dim=233))
model.add(Dense(units=1, activation='softmax'))

In [22]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 154)               36036     
_________________________________________________________________
dense_5 (Dense)              (None, 1)                 155       
Total params: 36,191
Trainable params: 36,191
Non-trainable params: 0
_________________________________________________________________


In [23]:
# Compile the model
model.compile(optimizer='adam',
              loss='mean_absolute_error',
              metrics=['accuracy'])

In [25]:
# training model

model.fit(
    x_train_scaled,
    y_train,
    epochs=100,
    shuffle=True,
    verbose=0
)

<tensorflow.python.keras.callbacks.History at 0x1635d6c4b70>

In [26]:
model_loss, model_accuracy = model.evaluate(
    x_test_scaled, y_test, verbose=2)
print(
    f"Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

9706/9706 - 0s - loss: 140.2461 - accuracy: 0.0000e+00
Neural Network - Loss: 140.24613641361194, Accuracy: 0.0


In [9]:
# add normalization layer
# normalizer = preprocessing.Normalization()

In [10]:
# normalizer.adapt(np.array(X_train))
# normalizer.adapt(np.array(X_train))