In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout, Activation, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow import keras
from tensorflow.keras.models import *
import cv2
import io
import os

\# Reading Data

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
listing = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/listings.csv', sep = ',')



In [5]:
listing['price'] = listing['price'].str.replace("$","")
listing['price']  = listing['price'].str.replace("'","")
listing['price'] = listing['price'].str.replace(",","")
listing['price'] = listing['price'].astype('float')

  """Entry point for launching an IPython kernel.


In [6]:
listing['log_price'] = np.log1p(listing['price']) #skewed data

In [7]:
df = listing[['property_type', 'room_type', 'bathrooms', 'bedrooms', 'bed_type',  'accommodates', 
              'guests_included', 'neighbourhood_group_cleansed', 
              'review_scores_rating', 'log_price']]
df.head(1)

Unnamed: 0,property_type,room_type,bathrooms,bedrooms,bed_type,accommodates,guests_included,neighbourhood_group_cleansed,review_scores_rating,log_price
0,Apartment,Entire home/apt,1.0,1.0,Real Bed,4,2,Queen Anne,95.0,4.454347


In [8]:
df = df.drop(labels=2476, axis=0) #removing column with http error

# Taking care of null data and encoding

In [9]:
df['property_type'].fillna(value='missing', inplace=True) 
df['room_type'].fillna(value='missing', inplace=True) 
df['bed_type'].fillna(value='missing', inplace=True) 
df['neighbourhood_group_cleansed'].fillna(value='missing', inplace=True)

In [10]:
df['bathrooms'].fillna(df['bathrooms'].mean(), inplace=True)
df['bedrooms'].fillna(df['bedrooms'].mean(), inplace=True)
df['review_scores_rating'].fillna(df['review_scores_rating'].mean(), inplace=True)

In [11]:
cols = ['property_type', 'room_type', 'bed_type', 'neighbourhood_group_cleansed']

In [12]:
data = df
oe = OrdinalEncoder()
data[cols] = oe.fit_transform(data[cols])

# reading images

In [13]:
import urllib
from urllib.error import HTTPError
import skimage.io
images=[]
img_price = np.zeros((3818,1))
for i in range(3818):
    try:
        img = skimage.io.imread(listing['picture_url'][i])
        img = cv2.resize(img, (128, 128))
        images.append(img)
    except HTTPError as exception: 
        pass 

In [20]:
# images = np.array(images)
images = images / 255 #scaling

TypeError: ignored

# scaling data

In [None]:
scaler = StandardScaler()
data = scaler.fit_transform(data)

In [None]:
data = pd.DataFrame(data)
data.describe()

# Neural networks

In [None]:
def create_ann(dim, regress=False):
    model = Sequential()
    model.add(Dense(8, input_dim=dim, activation="relu"))
    model.add(Dense(4, activation="relu"))
    return model

In [None]:
def create_cnn(width, height, depth, filters=(16, 32, 64), regress=False):
    
    inputShape = (height, width, depth)
    chanDim = -1
    inputs = Input(shape=inputShape)
    
    x = Conv2D(16, (3, 3), padding="same")(inputs)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=chanDim)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = Conv2D(32, (3, 3), padding="same")(x)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=chanDim)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = Conv2D(64, (3, 3), padding="same")(x)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=chanDim)(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)
    
    x = Flatten()(x)
    x = Dense(16)(x)
    x = Activation("relu")(x)
    x = BatchNormalization(axis=chanDim)(x)
    x = Dropout(0.5)(x)

    x = Dense(4)(x)
    x = Activation("relu")(x)
    
    model = Model(inputs, x)
    
    return model

In [None]:
from sklearn.model_selection import train_test_split

split = train_test_split(data, images, test_size=0.25, random_state=42)

(Xatt_train, Xatt_test, Ximage_train, Ximage_test) = split
y_train, y_test = Xatt_train[9].values, Xatt_test[9].values

X1_train = Xatt_train[[0,1,2,3,4,5,6,7,8]].values #numerical 
X2_train = Ximage_train #images
X1_test = Xatt_test[[0,1,2,3,4,5,6,7,8]].values
X2_test = Ximage_test

print(X1_train.shape, X1_test.shape, X2_train.shape, X2_test.shape, y_train.shape, y_test.shape)

In [None]:
from tensorflow.keras.layers import concatenate
mlp = create_ann(X1_train.shape[1], regress=False)
cnn = create_cnn(128, 128, 3, regress=False)
combinedInput = concatenate([mlp.output, cnn.output])

x = Dense(4, activation="relu")(combinedInput)
x = Dense(1, activation="linear")(x)

In [None]:
X1_train = np.array(X1_train)
X1_test = np.array(X1_test)
X2_test = np.array(X2_test)
X2_train = np.array(X2_train)

In [None]:
from tensorflow.keras.optimizers import Adam
model = Model(inputs=[mlp.input, cnn.input], outputs=x)
opt = Adam(lr=1e-3, decay=1e-4 / 200)
model.compile(loss="mse", optimizer=opt)

print("[INFO] training model...")
model.fit(x=[X1_train, X2_train], y=y_train, validation_data=([X1_test, X2_test], y_test),epochs=10, batch_size=64)

# Testing data

In [None]:
df = listing[['property_type', 'room_type', 'bathrooms', 'bedrooms', 'bed_type',  'accommodates', 
              'guests_included', 'neighbourhood_group_cleansed', 
              'review_scores_rating', 'log_price']]
attr_sample=df.head(1)

In [None]:
image_sample = skimage.io.imread(listing['picture_url'][1])
sample_resized=cv2.resize(image_sample,(128,128))
plt.imshow(sample_resized)
X2_final = sample_resized/255 #image

In [None]:
attr_sample[cols] = oe.fit_transform(attr_sample[cols])

In [None]:
y_ground_truth=attr_sample['log_price']
X1_final= attr_sample[['property_type', 'room_type', 'bathrooms', 'bedrooms', 'bed_type',  'accommodates', 
              'guests_included', 'neighbourhood_group_cleansed', 
              'review_scores_rating']]

In [None]:
ss = StandardScaler()
X1_final = ss.fit_transform(X1_final) #X1 shape is 1x9

In [None]:
X1_final=np.array(X1_final)
print(X1_final.shape," ",X2_final.shape)
y_pred = model.predict([np.reshape(X1_final,(1,9)),np.reshape(X2_final,(1,128,128,3))]) #MLP input and CNN input

In [None]:
X1_final = pd.DataFrame(X1_final) #array to dataframe
X1_final[9] = y_pred #appending the price column 
X1_final = np.array(X1_final) # 1x10

In [None]:
X1_final = scaler.inverse_transform(X1_final)
X1_final = pd.DataFrame(X1_final)

In [None]:
print("Actual price: ",attr_sample['log_price'].values)
print("Predicted price: ", X1_final[9].values)

# Test case 2

In [None]:
data = { 'property_type':[2], 'room_type':[2], 'bathrooms':[1], 'bedrooms':[1], 'bed_type':[1], 
        'accommodates':[16], 'guests_included':[15], 'neighbourhood_group_cleansed':[16], 'review_scores_rating':[42]}
attr_sample = pd.DataFrame(data)

In [None]:
image_sample = skimage.io.imread('C:/Users/Krishi Vijayanand/Downloads/socal_data/socal_pics/424.jpg')
sample_resized=cv2.resize(image_sample,(128,128))
plt.imshow(sample_resized)
X2_final = sample_resized/255

In [None]:
X1_final = attr_sample
X1_final = scaler.fit_transform(X1_final)
X1_final=np.array(X1_final)
y_pred=model.predict([np.reshape(X1_final,(1,9)),np.reshape(X2_final,(1,128,128,3))])

In [None]:
chumma = df
chumma = scaler.fit_transform(chumma)

In [None]:
X1_final = pd.DataFrame(X1_final)

In [None]:
X1_final[9] = y_pred
X1_final = np.array(X1_final)

In [None]:
X1_final = scaler.inverse_transform(X1_final)

In [None]:
X1_final = pd.DataFrame(X1_final)

In [15]:
print("Predicted price: ", np.expm1(X1_final[9].values))

NameError: ignored