In [270]:
import keras
import pandas as pd
import numpy as np
from keras.preprocessing.image import img_to_array
import matplotlib.pyplot as plt
from keras.layers import Dense, MaxPool2D, Conv2D, concatenate, Dropout
from keras.layers import BatchNormalization, Flatten, InputLayer, Input
from keras.models import Sequential, Model
from keras.utils import plot_model
from keras.optimizers import Adam

In [280]:
import os
import glob
import cv2
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelBinarizer, MinMaxScaler

In [247]:
def load_house_attributes(inputPath):
    cols = ["bedrooms", "bathrooms", "area", "zipcode", "price"]
    df = pd.read_csv(inputPath, sep=" ",header=None, names=cols)
    zipcodes = df["zipcode"].value_counts().keys().tolist()
    counts = df["zipcode"].value_counts().tolist()
    for (zipcode, count) in zip(zipcodes, counts):
        if count < 25:
            idxs = df[df["zipcode"] == zipcode].index
            df.drop(idxs, inplace=True)
    return df

In [200]:
df = load_house_attributes('HousesInfo.txt')

In [201]:
df

Unnamed: 0,bedrooms,bathrooms,area,zipcode,price
30,5,3.0,2520,93446,789000.0
32,3,2.0,1802,93446,365000.0
39,3,3.0,2146,93446,455000.0
80,4,2.5,2464,91901,599000.0
81,2,2.0,1845,91901,529800.0
82,2,1.0,1184,91901,397500.0
83,4,2.5,3060,91901,699999.0
84,3,2.5,2733,91901,689000.0
85,3,2.0,1706,91901,529000.0
86,5,4.5,4886,91901,1430000.0


for i in df.index.values:
    path = os.path.sep.join(['data/', "{}_*".format(i+1)])
    house_path = sorted(list(glob.glob(path)))
    img = []
    bath = cv2.resize(cv2.imread(house_path[0]), (224,224))
    bed = cv2.resize(cv2.imread(house_path[1]), (224,224))
    front = cv2.resize(cv2.imread(house_path[2]), (224,224))
    kitchen = cv2.resize(cv2.imread(house_path[3]), (224,224))
    
    
    img1 = np.concatenate((img_to_array(bath), img_to_array(bed)), axis=1)
    img1 = img1.astype(int)
    img2 = np.concatenate((img_to_array(front), img_to_array(kitchen)), axis=1)
    img2 = img2.astype(int)
    img3 = np.concatenate((img1, img2), axis=0)
    cv2.imwrite('newdata/'+str(i)+'.jpg', img3)

In [202]:
def process_house_attributes(df, train, test):
	# initialize the column names of the continuous data
	continuous = ["bedrooms", "bathrooms", "area"]
 
	# performin min-max scaling each continuous feature column to
	# the range [0, 1]
	cs = MinMaxScaler()
	trainContinuous = cs.fit_transform(train[continuous])
	testContinuous = cs.transform(test[continuous])
 
	# one-hot encode the zip code categorical data (by definition of
	# one-hot encoding, all output features are now in the range [0, 1])
	zipBinarizer = LabelBinarizer().fit(df["zipcode"])
	trainCategorical = zipBinarizer.transform(train["zipcode"])
	testCategorical = zipBinarizer.transform(test["zipcode"])
 
	# construct our training and testing data points by concatenating
	# the categorical features with the continuous features
	trainX = np.hstack([trainCategorical, trainContinuous])
	testX = np.hstack([testCategorical, testContinuous])
 
	# return the concatenated training and testing data
	return (trainX, testX)

In [229]:
def load_house_images(df, inputPath):
	# initialize our images array (i.e., the house images themselves)
	images = []
 
	# loop over the indexes of the houses
	for i in df.index.values:
		# find the four images for the house and sort the file paths,
		# ensuring the four are always in the *same order*
		basePath = os.path.sep.join([inputPath, "{}_*".format(i + 1)])
		housePaths = sorted(list(glob.glob(basePath)))
		# initialize our list of input images along with the output image
		# after *combining* the four input images
		inputImages = []
		outputImage = np.zeros((64, 64, 3), dtype="uint8")
 
		# loop over the input house paths
		for housePath in housePaths:
			# load the input image, resize it to be 32 32, and then
			# update the list of input images
			image = cv2.imread(housePath)
			image = cv2.resize(image, (32, 32))
			inputImages.append(image)
 
		# tile the four input images in the output image such the first
		# image goes in the top-right corner, the second image in the
		# top-left corner, the third image in the bottom-right corner,
		# and the final image in the bottom-left corner
		outputImage[0:32, 0:32] = inputImages[0]
		outputImage[0:32, 32:64] = inputImages[1]
		outputImage[32:64, 32:64] = inputImages[2]
		outputImage[32:64, 0:32] = inputImages[3]
 
		# add the tiled image to our set of images the network will be
		# trained on
		images.append(outputImage)
 
	# return our set of images
	return np.array(images)

In [231]:
images = load_house_images(df, 'data/')

In [236]:
split = train_test_split(df, images, test_size=0.25, random_state=42)
(trainAttrX, testAttrX, trainImagesX, testImagesX) = split
 
# find the largest house price in the training set and use it to
# scale our house prices to the range [0, 1] (will lead to better
# training and convergence)
maxPrice = trainAttrX["price"].max()
trainY = trainAttrX["price"] / maxPrice
testY = testAttrX["price"] / maxPrice
 
# process the house attributes data by performing min-max scaling
# on continuous features, one-hot encoding on categorical features,
# and then finally concatenating them together
(trainAttrX, testAttrX) = process_house_attributes(df,
	trainAttrX, testAttrX)

In [176]:
X

Unnamed: 0,0,1,2,3
0,4,4.0,4053,85255
1,4,3.0,3343,36372
2,3,4.0,3923,85266
3,5,5.0,4022,85262
4,3,4.0,4116,85266
5,4,5.0,4581,85266
6,3,4.0,2544,85262
7,4,5.0,5524,85266
8,3,4.0,4229,85255
9,4,5.0,3550,85262


In [249]:
def create_cnn():
    input_shape = (64,64,3)
    inputs = Input(shape = input_shape)
    x = Conv2D(32, (3,3), padding='same', activation='relu')(inputs)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPool2D((2,2))(x)
    
    x = Conv2D(64, (3,3), padding='same', activation='relu')(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPool2D((2,2))(x)
    
    x = Conv2D(128, (3,3), padding='same', activation='relu')(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPool2D((2,2))(x)
    
    x = Conv2D(64, (3,3), padding='same', activation='relu')(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPool2D((2,2))(x)
    
    x = Conv2D(16, (3,3), padding='same', activation='relu')(x)
    x = BatchNormalization(axis=-1)(x)
    x = MaxPool2D((2,2))(x)
    
    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)
    x = Dropout(0.5)(x)
    x = Dense(16, activation='relu')(x)
    x = Dense(1, activation='linear')(x)
    model = Model(inputs, x)
    
    
    return model

In [258]:
def create_dense():
    model= Sequential()
    model.add(Dense(16, input_shape=(trainAttrX.shape[1],), activation="relu"))
    model.add(Dense(32, activation="relu"))
    model.add(Dense(8, activation="relu"))
    model.add(Dense(1, activation="linear"))
    return model

In [259]:
cnn = create_cnn()

In [260]:
den = create_dense()

In [265]:
cm = concatenate([den.output, cnn.output])
x = Dense(8, activation = "relu")(cm)
x = Dense(1, activation = "linear")(x)
model = Model(inputs = [den.input, cnn.input], outputs = x)

In [267]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 64, 64, 3)    0                                            
__________________________________________________________________________________________________
conv2d_6 (Conv2D)               (None, 64, 64, 32)   896         input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_6 (BatchNor (None, 64, 64, 32)   128         conv2d_6[0][0]                   
__________________________________________________________________________________________________
max_pooling2d_6 (MaxPooling2D)  (None, 32, 32, 32)   0           batch_normalization_6[0][0]      
__________________________________________________________________________________________________
conv2d_7 (

In [269]:
plot_model(model)

In [272]:
adam = Adam()
model.compile(optimizer=adam, loss = "mean_squared_error")

W0722 01:20:46.252931 140614612793152 deprecation_wrapper.py:118] From /home/dexter/anaconda3/lib/python3.6/site-packages/keras/optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.



In [273]:
model.fit([trainAttrX,trainImagesX], trainY, 
          validation_data=([testAttrX, testImagesX], testY),
         epochs = 200, batch_size=8)

Train on 271 samples, validate on 91 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200

KeyboardInterrupt: 

In [275]:
preds = model.predict([testAttrX, testImagesX])

In [276]:
diff = preds.flatten() - testY
percentDiff = (diff / testY) * 100
absPercentDiff = np.abs(percentDiff)
 
# compute the mean and standard deviation of the absolute percentage
# difference
mean = np.mean(absPercentDiff)
std = np.std(absPercentDiff)
 

In [279]:
np.mean(np.abs(100*(preds.flatten() - testY)/testY))

29.702658325029407

In [281]:
mean_squared_error(testY, preds)

0.0011628365136601895