# Predicción del precio de alquiler de una casa vacacional en Madrid

## Setup

In [None]:
def getInputPath():
  return 'Inputs/'

In [None]:
def getOutputPath():
  return 'Outputs/'

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping, ReduceLROnPlateau

import numpy
# fix random seed for reproducibility
numpy.random.seed(7)
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
%matplotlib inline

Using TensorFlow backend.


## Preparación de los datos

In [None]:
datos_airbnb_consolidados = pd.read_csv(getOutputPath() + "consolidados_ml.csv")
# Paso 1: Preparamos los datos
features = datos_airbnb_consolidados.columns.drop(['price'])
X = datos_airbnb_consolidados[features].values

y = datos_airbnb_consolidados['price'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=1)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.2, random_state=1)


print('Datos train: ', X_train.shape)
print('Datos validation: ', X_val.shape)
print('Datos test:  ', X_test.shape)
print('Propocion train:%0.3f'%np.mean(y_train))
print('Propocion validation:%0.3f'%np.mean(y_val))
print('Propocion test: %0.3f'%np.mean(y_test))

Datos train:  (10104, 211)
Datos validation:  (2527, 211)
Datos test:   (5414, 211)
Propocion train:74.229
Propocion validation:73.206
Propocion test: 74.547


In [None]:

# Paso 1.1: Preprocesar las variables
scaler = StandardScaler().fit(X_train)
Xs_train = scaler.transform(X_train)
Xs_val = scaler.transform(X_val)
Xs_test = scaler.transform(X_test)

## Aproximación base

In [None]:
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['mse'])

In [None]:
# create model
model = Sequential()
model.add(Dense(500, input_dim=211, kernel_initializer='random_normal'))
#model.add(Dropout(0.1))
model.add(Dense(500))
#model.add(Dropout(0.1))
model.add(Dense(500))

#model.add(Dropout(0.1))
model.add(Dense(500))

#model.add(Dropout(0.1))
model.add(Dense(500))

model.add(Dense(1))

In [None]:
# Fit the model
early_stopper = EarlyStopping(patience=5)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3,
                              patience=2, min_lr=0.0001)
model.fit(Xs_train, y_train, epochs=500, batch_size=500, validation_data=(Xs_val, y_val), callbacks=[ reduce_lr, early_stopper])

Train on 10104 samples, validate on 2527 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500


<keras.callbacks.History at 0x7f46ed4e9fd0>

In [None]:
# evaluate the model
scores = model.evaluate(Xs_test, y_test)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
scores


mean_squared_error: 180298.89%


[1802.9888679721412, 1802.9888679721412]

## Búqueda de hiperparámetros con un algoritmo genético

In [None]:
import sys
sys.path.insert(0, 'neural-network-genetic-algorithm/')


import main as ge


In [None]:
import logging
from optimizer import Optimizer
from tqdm import tqdm

# Setup logging.
logging.basicConfig(
    format='%(asctime)s - %(levelname)s - %(message)s',
    datefmt='%m/%d/%Y %I:%M:%S %p',
    level=logging.DEBUG,
    filename='log.txt')

In [None]:
generations = 20  # Number of times to evole the population.
population = 20  # Number of networks in each generation.


nn_param_choices = {
    'nb_neurons': [64, 128, 256, 512, 768, 1024],
    'nb_layers': [1, 2, 3, 4, 6, 7],
    'activation': ['linear','relu', 'elu', 'tanh', 'sigmoid'],
    'optimizer': ['rmsprop', 'adam', 'adagrad',
                  'adadelta', 'adamax', 'nadam'],
'batch_size': [100, 200, 300, 500, 1000]
}

logging.info("***Evolving %d generations with population %d***" %
             (generations, population))

best_network = ge.generate(generations, population, nn_param_choices, 1, Xs_train, Xs_val, Xs_test, y_train, y_val, y_test)


100%|██████████| 20/20 [03:50<00:00,  9.14s/it]
100%|██████████| 20/20 [01:56<00:00, 18.41s/it]
100%|██████████| 20/20 [02:37<00:00, 16.61s/it]
100%|██████████| 20/20 [02:12<00:00, 12.15s/it]
100%|██████████| 20/20 [02:49<00:00, 15.45s/it]
100%|██████████| 20/20 [02:31<00:00, 13.46s/it]
100%|██████████| 20/20 [02:26<00:00, 11.88s/it]
100%|██████████| 20/20 [03:09<00:00, 15.78s/it]
100%|██████████| 20/20 [03:22<00:00, 20.71s/it]
100%|██████████| 20/20 [04:02<00:00, 21.42s/it]
100%|██████████| 20/20 [04:17<00:00, 22.39s/it]
100%|██████████| 20/20 [04:50<00:00, 23.65s/it]
100%|██████████| 20/20 [05:15<00:00, 28.47s/it]
100%|██████████| 20/20 [05:38<00:00, 37.94s/it]
100%|██████████| 20/20 [04:56<00:00, 31.27s/it]
100%|██████████| 20/20 [05:59<00:00, 34.31s/it]
100%|██████████| 20/20 [05:49<00:00, 35.08s/it]
100%|██████████| 20/20 [08:36<00:00, 42.81s/it]
100%|██████████| 20/20 [07:19<00:00, 40.68s/it]
100%|██████████| 20/20 [07:59<00:00, 41.70s/it]


In [None]:

best_network.network


{'activation': 'relu',
 'batch_size': 100,
 'nb_layers': 7,
 'nb_neurons': 768,
 'optimizer': 'adamax'}

In [None]:
!ls 


drive  log.txt	sample_data


In [None]:
from google.colab import files


files.download('log.txt')

## Afinar red con callbacks

### Base

In [None]:
nb_layers = best_network.network['nb_layers']
nb_neurons = best_network.network['nb_neurons']
activation = best_network.network['activation']
optimizer = best_network.network['optimizer']


model = Sequential()

# Add each layer.
for i in range(nb_layers):

    # Need input shape for first layer.
    if i == 0:
        model.add(Dense(nb_neurons, activation=activation, input_dim=211))
    else:
        model.add(Dense(nb_neurons, activation=activation))

    

# Output layer.
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer=optimizer,
              metrics=['mse'])

In [None]:
model.fit(Xs_train, y_train,
              batch_size=best_network.network['batch_size'],
              epochs=150,
              verbose=2,
              validation_data=(Xs_val, y_val))

score = model.evaluate(Xs_test, y_test, verbose=2)
score

Train on 10104 samples, validate on 2527 samples
Epoch 1/150
 - 28s - loss: 3491.1083 - mean_squared_error: 3491.1083 - val_loss: 2448.1652 - val_mean_squared_error: 2448.1652
Epoch 2/150
 - 2s - loss: 1776.7720 - mean_squared_error: 1776.7720 - val_loss: 1717.4067 - val_mean_squared_error: 1717.4067
Epoch 3/150
 - 2s - loss: 1489.2175 - mean_squared_error: 1489.2175 - val_loss: 1626.3553 - val_mean_squared_error: 1626.3553
Epoch 4/150
 - 2s - loss: 1268.0068 - mean_squared_error: 1268.0068 - val_loss: 1611.8498 - val_mean_squared_error: 1611.8498
Epoch 5/150
 - 2s - loss: 1078.7451 - mean_squared_error: 1078.7451 - val_loss: 1557.7091 - val_mean_squared_error: 1557.7091
Epoch 6/150
 - 2s - loss: 877.7691 - mean_squared_error: 877.7691 - val_loss: 1583.9665 - val_mean_squared_error: 1583.9665
Epoch 7/150
 - 2s - loss: 734.7812 - mean_squared_error: 734.7812 - val_loss: 1531.5840 - val_mean_squared_error: 1531.5840
Epoch 8/150
 - 2s - loss: 523.1343 - mean_squared_error: 523.1343 - val_

[1315.0827249696786, 1315.0827249696786]

### Ajuste Learning Rate

In [None]:
from keras.optimizers import Adamax

nb_layers = best_network.network['nb_layers']
nb_neurons = best_network.network['nb_neurons']
activation = best_network.network['activation']
optimizer = best_network.network['optimizer']

model = Sequential()

# Add each layer.
for i in range(nb_layers):

    # Need input shape for first layer.
    if i == 0:
        model.add(Dense(nb_neurons, activation=activation, input_dim=211))
    else:
        model.add(Dense(nb_neurons, activation=activation))

    

# Output layer.
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer=optimizer,
              metrics=['mse'])

In [None]:
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.3,
                              patience=2, min_lr=0.00001, cooldown=3)

model.fit(Xs_train, y_train,
              batch_size=best_network.network['batch_size'],
              epochs=150,
              verbose=2,
              validation_data=(Xs_val, y_val), callbacks=[reduce_lr])

score = model.evaluate(Xs_test, y_test, verbose=2)
score

Train on 10104 samples, validate on 2527 samples
Epoch 1/150
 - 30s - loss: 2532.8965 - mean_squared_error: 2532.8965 - val_loss: 1840.2206 - val_mean_squared_error: 1840.2206
Epoch 2/150
 - 2s - loss: 1656.9576 - mean_squared_error: 1656.9576 - val_loss: 1736.8250 - val_mean_squared_error: 1736.8250
Epoch 3/150
 - 2s - loss: 1388.4757 - mean_squared_error: 1388.4757 - val_loss: 1600.3117 - val_mean_squared_error: 1600.3117
Epoch 4/150
 - 2s - loss: 1182.2606 - mean_squared_error: 1182.2606 - val_loss: 1567.2190 - val_mean_squared_error: 1567.2190
Epoch 5/150
 - 2s - loss: 952.4171 - mean_squared_error: 952.4171 - val_loss: 1595.7205 - val_mean_squared_error: 1595.7205
Epoch 6/150
 - 2s - loss: 741.3650 - mean_squared_error: 741.3650 - val_loss: 1749.4547 - val_mean_squared_error: 1749.4547
Epoch 7/150
 - 2s - loss: 524.9158 - mean_squared_error: 524.9158 - val_loss: 1495.8945 - val_mean_squared_error: 1495.8945
Epoch 8/150
 - 2s - loss: 400.0002 - mean_squared_error: 400.0002 - val_lo

[1369.0175877772974, 1369.0175877772974]

## Conclusión
Como se ve la red neuronal con los siguientes parámetros es la que mejor resultado nos da:

{'activation': 'relu',

 'batch_size': 100,
 
 'nb_layers': 7,
 
 'nb_neurons': 768,
 
 'optimizer': 'adamax'}
 
 Nos da un valor de *MSE* de **1315.08**
 
