In [1]:
# multilayer perceptron imports
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
#import pandas_profiling
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

In [52]:
# load the cleaned data 
X = pd.read_csv('airbnb_cleaned_3.csv')

In [53]:
X.drop(columns = ['Unnamed: 0','Host_greets_you'],inplace = True)
print(X.shape)
X.head()

(22240, 15)


Unnamed: 0,price,cleaning_fee,accommodates,minimum_nights,bedrooms,bathrooms,neighbourhood_group_cleansed,room_type,description,extra_people,Laptop_friendly_workspace,TV,Wifi,Family_kid_friendly,Smoking_allowed
0,60.0,30.0,3,4,1.0,1.0,Mitte,Entire home/apt,Great location! 30 of 75 sq meters. This wood...,28.0,True,True,True,True,False
1,17.0,0.0,2,2,1.0,1.0,Pankow,Private room,In the summertime we are spending most of our ...,0.0,True,False,True,False,False
2,90.0,50.0,4,62,1.0,1.0,Pankow,Entire home/apt,This beautiful first floor apartment is situa...,20.0,True,False,True,True,False
3,26.0,30.0,2,5,1.0,1.0,Tempelhof - Schöneberg,Private room,First of all: I prefer short-notice bookings. ...,18.0,True,False,True,False,False
4,42.0,0.0,2,2,1.0,1.0,Pankow,Private room,Cozy and large room in the beautiful district ...,24.0,True,False,True,True,False


In [54]:
# explore the clean data (describe and profile report)
#X.profile_report()

In [55]:
X.drop(X[X.minimum_nights > 365].index,inplace = True)

In [56]:
# encoding Neighborhoods as values 1-12
# Mitte = 1
# Friedrichshain-Kreuzberg = 2
# Pankow = 3                      
# Neukölln = 4                
# Charlottenburg-Wilm. = 5
# Tempelhof - Schöneberg = 6
# Lichtenberg = 7
# Treptow - Köpenick = 8
# Steglitz - Zehlendorf = 9
# Reinickendorf = 10
# Marzahn - Hellersdorf = 11
# Spandau = 12
X.replace({'Mitte':1,'Friedrichshain-Kreuzberg':2,'Pankow':3,'Neukölln':4,'Charlottenburg-Wilm.':5,'Tempelhof - Schöneberg':6,'Lichtenberg':7,'Treptow - Köpenick':8,'Steglitz - Zehlendorf':9,'Reinickendorf':10,'Marzahn - Hellersdorf':11,'Spandau':12},inplace = True)

# encoding room types as values 
# 15 Entire home/apt
# 14 Private room
# 36 shared room
X.replace({'Entire home/apt':15,'Private room':14,'Shared room':36},inplace = True)

# setting true and false values to 1 or 0
X.replace({True:1,False:0},inplace = True)

In [57]:
X.head()

Unnamed: 0,price,cleaning_fee,accommodates,minimum_nights,bedrooms,bathrooms,neighbourhood_group_cleansed,room_type,description,extra_people,Laptop_friendly_workspace,TV,Wifi,Family_kid_friendly,Smoking_allowed
0,60.0,30.0,3,4,1.0,1.0,1,15,Great location! 30 of 75 sq meters. This wood...,28.0,1,1,1,1,0
1,17.0,0.0,2,2,1.0,1.0,3,14,In the summertime we are spending most of our ...,0.0,1,0,1,0,0
2,90.0,50.0,4,62,1.0,1.0,3,15,This beautiful first floor apartment is situa...,20.0,1,0,1,1,0
3,26.0,30.0,2,5,1.0,1.0,6,14,First of all: I prefer short-notice bookings. ...,18.0,1,0,1,0,0
4,42.0,0.0,2,2,1.0,1.0,3,14,Cozy and large room in the beautiful district ...,24.0,1,0,1,1,0


In [58]:
y = X['price']
X.drop(columns = 'price',inplace = True)
text = X.copy()
X.drop(columns = 'description',inplace = True)

In [59]:
X.head()

Unnamed: 0,cleaning_fee,accommodates,minimum_nights,bedrooms,bathrooms,neighbourhood_group_cleansed,room_type,extra_people,Laptop_friendly_workspace,TV,Wifi,Family_kid_friendly,Smoking_allowed
0,30.0,3,4,1.0,1.0,1,15,28.0,1,1,1,1,0
1,0.0,2,2,1.0,1.0,3,14,0.0,1,0,1,0,0
2,50.0,4,62,1.0,1.0,3,15,20.0,1,0,1,1,0
3,30.0,2,5,1.0,1.0,6,14,18.0,1,0,1,0,0
4,0.0,2,2,1.0,1.0,3,14,24.0,1,0,1,1,0


In [60]:
y.head()

0    60.0
1    17.0
2    90.0
3    26.0
4    42.0
Name: price, dtype: float64

In [61]:
# split into train and test data
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2,random_state = 42)

In [62]:
X_train = X_train.values
y_train = y_train.values
X_test = X_test.values
y_test = y_test.values

In [63]:
# normalize data
# run only once
#X_train = X_train / np.amax(X_train, axis=0)
#X_test = X_test / np.amax(X_test, axis=0)
#y_train = y_train / 100
#y_test = y_test / 100

In [64]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(17785, 13)
(4447, 13)
(17785,)
(4447,)


In [65]:
#set up model architecture
def create_model():
    model = Sequential()
    model.add(Dense(18,input_shape = (X_train.shape[1],),activation = 'relu'))
    model.add(Dense(12,activation = 'relu'))
    model.add(Dense(4,activation = 'relu'))
    model.add(Dense(1))
    model.compile(loss = 'mae',optimizer = 'adam')
    
    return model
model = create_model()
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 18)                252       
_________________________________________________________________
dense_5 (Dense)              (None, 12)                228       
_________________________________________________________________
dense_6 (Dense)              (None, 4)                 52        
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 5         
Total params: 537
Trainable params: 537
Non-trainable params: 0
_________________________________________________________________


In [69]:
# fit model
model.fit(X_train,y_train,
         epochs = 10,
         batch_size = 24,
         validation_data = (X_test,y_test),
         verbose = True,
         use_multiprocessing=False)

Train on 17785 samples, validate on 4447 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x1d5056062b0>

In [70]:
# evaluate model
score = model.evaluate(X_train,y_train,verbose = False)
score

19.32797574206018

In [71]:
# test predictions
z = [30.0,3,4,1,1,1,15,28,1,1,1,1,0]
pred = model.predict(X_test)
pred[:10]


array([[ 49.77434 ],
       [157.17906 ],
       [ 90.87505 ],
       [ 43.856514],
       [108.7303  ],
       [ 37.054436],
       [ 46.590122],
       [ 26.346895],
       [106.27014 ],
       [ 58.56659 ]], dtype=float32)

In [78]:
!pip install h5py



In [79]:
# saving the model
#import _pickle as cPickle
import dill
from sklearn.externals import joblib


'''filename = open('mmodel2.pkl','wb')
pickle.dump(model,filename)
filename.close()'''

#with open('model_pickled.pkl','wb') as file:
    #dill.dump(model,file)

path = "D:\\Documents\\Build week projects\\Airbnb\\Models"
model.save('model.h5')



In [86]:
from tensorflow.keras.models import load_model
m = load_model('model.h5')

In [87]:
m.predict([z])

array([[74.08036]], dtype=float32)