# Red neuronal mixta con datos numéricos e imágenes
Para esta parte, partimos de los dataset que ya teníamos preparados en train y test y que contienen los datos numéricos. También usamos los que contienen la ruta de las imágenes y que están ordenados de la misma forma.

Recordemos que estos datasets los dejamos listos en el notebook de trataimiento de datos. [Tratamiento de datos](limpieza_datos.ipynb).

Las imágenes a las que hacemos referencia corresponden a las portadas de cada película y que no incluimos en el repositorio por el tamaño que ocupan.


In [1]:
import os
import pandas as pd

# Carga el dataset y devuelve un dataframe de Pandas
def load_airbnb_dataset(ruta,nombre):
    csv_path = os.path.join(ruta, nombre)
    return pd.read_csv(csv_path, sep=';')

## Carga de datasets

In [2]:
train = load_airbnb_dataset('datasets','train_final.csv')
test = load_airbnb_dataset('datasets','test_final.csv')
dfTrainImagenes = load_airbnb_dataset('datasets','imagenes_train_final.csv')
dfTestImagenes = load_airbnb_dataset('datasets','imagenes_test_final.csv')

## Carga de imágenes

In [19]:
import cv2
import os
import numpy as np

def cargaImagenes(dataF):
    inputImages = []

    origHeight = 336
    origWidth = 224
    origChann = 3

    for ruta in dataF['local_poster_path']:
        ruta = ruta[3:]  # Quito caracteres sobrantes

        image = cv2.imread(ruta)

        height, width, channels = image.shape

        if (height != origHeight) or (width != origWidth) or (channels != origChann):
            raise ValueError('Tamaño incorrecto en imagen:', ruta)
        
        inputImages.append(image)

    return np.array(inputImages)

In [20]:
imagenesTrain = cargaImagenes(dfTrainImagenes)
imagenesTest = cargaImagenes(dfTestImagenes)

## Escalado de datos
### Escalar imágenes en rango de 0,1

In [21]:
imagenesTrain = imagenesTrain / 255.0
imagenesTest = imagenesTest / 255.0

### Escalar datos numéricos

In [22]:
from sklearn import preprocessing

# Separo la Y del resto de datos
dataPrecio = train['vote_average']
dataSinPrecio = train.drop(['vote_average'], axis=1, inplace=False)

dataPrecioTest = test['vote_average']
dataSinPrecioTest = test.drop(['vote_average'], axis=1, inplace=False)

y_train = dataPrecio.values
X_train = dataSinPrecio.values

y_test = dataPrecioTest.values
X_test = dataSinPrecioTest.values

feature_names = train.columns[:]

# Obtener precio máximo en Train, y escalamos los precios de test y train en rango de [0, 1]
maxPrice = train["vote_average"].max()
trainY = train["vote_average"] / maxPrice
testY = test["vote_average"] / maxPrice

# Escalamos variables numéricas de train y test
scaler = preprocessing.StandardScaler().fit(X_test)
XtestScaled = scaler.transform(X_test)

scaler = preprocessing.StandardScaler().fit(X_train)
XtrainScaled = scaler.transform(X_train)

### Red neuronal (MLP)

In [23]:
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Dropout
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
from keras.models import Model
from keras.optimizers import Adam

def create_mlp(dim):
	# define our MLP network
	model = Sequential()
	model.add(Dense(27, input_dim=dim, activation="relu"))
	model.add(Dense(9, activation="relu"))
	model.add(Dense(3, activation="relu"))
	model.add(Dense(1, activation="linear"))
	
	return model

Using TensorFlow backend.


### Red neuronal convolucional (CNN)

In [24]:
def create_cnn(width, height, depth, filters=(16, 32, 64), regress=False):
    # initialize the input shape and channel dimension, assuming
    # TensorFlow/channels-last ordering
    inputShape = (height, width, depth)
    chanDim = -1

    # define the model input
    inputs = Input(shape=inputShape)

    # loop over the number of filters
    for (i, f) in enumerate(filters):
        # if this is the first CONV layer then set the input
        # appropriately
        if i == 0:
            x = inputs

        # CONV => RELU => BN => POOL
        x = Conv2D(f, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=chanDim)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)

    # flatten the volume, then FC => RELU => BN => DROPOUT
    x = Flatten()(x)
    x = Dense(16)(x)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=chanDim)(x)
    x = Dropout(0.5)(x)

    # apply another FC layer, this one to match the number of nodes
    # coming out of the MLP
    x = Dense(10)(x)
    x = Activation("relu")(x)

    # check to see if the regression node should be added
    if regress:
        x = Dense(1, activation="linear")(x)

    # construct the CNN
    model = Model(inputs, x)

    # return the CNN
    return model

### Creación de modelos, concatenación MLP con CNN y generación de modelo final
Obtenemos ahora el modelo MLP y el CNN, luego los concatenamos y generamos un nuevo modelo con la unión

In [25]:
from keras.layers import concatenate

# create the MLP and CNN models
mlp = create_mlp(XtrainScaled.shape[1])
cnn = create_cnn(224, 336, 3, regress=False)

# create the input to our final set of layers as the *output* of both
# the MLP and CNN
combinedInput = concatenate([mlp.output, cnn.output])

# our final FC layer head will have two dense layers, the final one
# being our regression head
x = Dense(4, activation="relu")(combinedInput)
x = Dense(1, activation="linear")(x)

# our final model will accept categorical/numerical data on the MLP
# input and images on the CNN input, outputting a single value (the
# predicted price of the house)
model = Model(inputs=[mlp.input, cnn.input], outputs=x)













Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


### Compilar y entrenar modelo

In [26]:
opt = Adam(lr=1e-3, decay=1e-3 / 200)
model.compile(loss="mean_squared_error", optimizer=opt)
# train the model
print("[INFO] training model...")
historico = model.fit(
	[XtrainScaled, imagenesTrain], trainY,
	validation_data=([XtestScaled, imagenesTest], testY),
	epochs=200, batch_size=8)


[INFO] training model...


Train on 7921 samples, validate on 1977 samples
Epoch 1/200
Epoch 2/200
1728/7921 [=====>........................] - ETA: 45s - loss: 1271691.9732

KeyboardInterrupt: 