In [1]:
import pandas as pd
import numpy as np

In [10]:
import cv2

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Dense,MaxPooling2D,Conv2D,Flatten,Input,InputLayer

In [3]:
df=pd.read_csv("train.csv")
df=df.drop(["address","index","city"],axis=1)
train_id=df['image_id']
df=df.drop('image_id',axis=1)

In [4]:
df.head()

Unnamed: 0,rooms,retail_price,sqft,toilets
0,2.0,299000,1295.0,2.0
1,3.0,299000,1566.0,2.0
2,5.0,499990,3068.0,3.0
3,4.0,849900,2230.0,2.1
4,3.0,333490,1898.0,2.0


In [5]:
df.isna().sum()

rooms           196
retail_price      0
sqft            147
toilets           0
dtype: int64

In [6]:
df['rooms']=df['rooms'].fillna(df['rooms'].mean())

In [7]:
df['sqft']=df['sqft'].fillna(df['sqft'].mean())

In [8]:
path=[]
image_id=[]
for id in train_id:
    i=0
    path_=f"train_images/train/{id}.jpg"
    i=i+1
    path.append(path_)
    image_id.append(id)

In [9]:
df['path']=path

In [16]:
df

Unnamed: 0,rooms,retail_price,sqft,toilets,path
0,2.0,299000,1295.0,2.0,train_images/train/5832.jpg
1,3.0,299000,1566.0,2.0,train_images/train/5584.jpg
2,5.0,499990,3068.0,3.0,train_images/train/9408.jpg
3,4.0,849900,2230.0,2.1,train_images/train/4644.jpg
4,3.0,333490,1898.0,2.0,train_images/train/5739.jpg
...,...,...,...,...,...
8395,4.0,289990,1498.0,2.0,train_images/train/792.jpg
8396,4.0,895000,2981.0,3.0,train_images/train/10671.jpg
8397,3.0,479900,1272.0,1.0,train_images/train/11650.jpg
8398,4.0,599000,1924.0,3.0,train_images/train/2134.jpg


In [122]:
def data_generator(sample,batch=8):
    num_sample=len(sample)
    for offset in range(0,num_sample,batch):
        end=offset+batch
        if (offset==len(sample)):
            end=None
        batch_samples=np.array(sample[offset:end])
        data=sample[offset:end]
        image_=[]
        for batch_sample in batch_samples:
            img=cv2.imread(batch_sample[4])
            img=cv2.resize(img, (150, 150))
            img=np.array(img)
            image_.append(img)
        detail=data.drop(['retail_price','path'],axis=1)
        price=data['retail_price']
        image_=np.array(image_)
        yield [image_,detail.to_numpy()],price.to_numpy()

In [123]:
train_data=data_generator(df,1)

In [161]:
img_decoder=tf.keras.Sequential([
    Input(shape=(150,150,3)),
    Conv2D(32,(3,3),activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(32,(3,3),activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(16,(3,3),activation='relu'),
    MaxPooling2D(2,2),
    Conv2D(8,(3,3),activation='relu'),
    MaxPooling2D(2,2),
    Flatten(),
    Dense(128,activation='tanh'),
    Dense(64,activation='tanh'),
    Dense(3),
])



In [162]:
details_decoder=tf.keras.Sequential([
    Dense(3,activation='relu'),
])

In [163]:
input_1=tf.keras.layers.Input(shape=(150,150,3))
m_1=img_decoder(input_1)
input_2=tf.keras.layers.Input(shape=(3,))
m_2=details_decoder(input_2)

In [164]:
output = tf.keras.layers.Concatenate(axis=1)([m_1,m_2])
#output = tf.keras.layers.Dot(axes=-1)([m_1,m_2])
output = tf.keras.layers.Dense(64,activation='relu')(output)
output = tf.keras.layers.Dense(64,activation='relu')(output)
output = tf.keras.layers.Dense(1)(output)

model=tf.keras.Model([input_1,input_2],output)

In [165]:
model.summary()

Model: "model_11"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_48 (InputLayer)           [(None, 150, 150, 3) 0                                            
__________________________________________________________________________________________________
input_49 (InputLayer)           [(None, 3)]          0                                            
__________________________________________________________________________________________________
sequential_22 (Sequential)      (None, 3)            74683       input_48[0][0]                   
__________________________________________________________________________________________________
sequential_23 (Sequential)      (None, 3)            12          input_49[0][0]                   
___________________________________________________________________________________________

In [166]:
cost_fn = tf.keras.losses.MeanAbsoluteError()
opt = tf.keras.optimizers.RMSprop(learning_rate=0.001)
model.compile(optimizer=opt,
              loss=cost_fn)

In [167]:
for _ in range(10):
    train_data=data_generator(df[:6000],1)
    val_data=data_generator(df[6000:],1)
    model.fit(train_data,validation_data=val_data)



In [180]:
def test_data_generator(sample,batch=8):
    num_sample=len(sample)
    for offset in range(0,num_sample,batch):
        end=offset+batch
        if (offset==len(sample)):
            end=None
        batch_samples=np.array(sample[offset:end])
        data=sample[offset:end]
        image_=[]
        for batch_sample in batch_samples:
            img=cv2.imread(batch_sample[3])
            img=cv2.resize(img, (150, 150))
            img=np.array(img)
            image_.append(img)
        detail=data.drop(['path'],axis=1)
        image_=np.array(image_)
        yield [image_,detail.to_numpy()]

In [169]:
test_df=pd.read_csv("test.csv")

In [171]:
test_df.head()

Unnamed: 0,index,rooms,address,city,sqft,toilets,image_id
0,3076,3,33171 Sea Lion Drive,"Dana Point, CA",1510,2.0,3076
1,3218,4,33 Segovia,"San Clemente, CA",2342,3.0,3218
2,9213,2,1074 TAMARACK Road,"Pinon Hills, CA",2845,3.0,9213
3,10018,4,10989 Coalinga Avenue,"Montclair, CA",2379,3.0,10018
4,11355,4,1128 Custer,"Campo, CA",1836,3.0,11355


In [173]:
test_path=[]
path=[]
image_id=[]
for id in test_df['image_id']:
    i=0
    path_=f"test_images/test/{id}.jpg"
    i=i+1
    test_path.append(path_)

In [176]:
test_=test_df.drop(['index','address','city','image_id'],axis=1)

In [178]:
test_['path']=test_path

In [179]:
test_

Unnamed: 0,rooms,sqft,toilets,path
0,3,1510,2.0,test_images/test/3076.jpg
1,4,2342,3.0,test_images/test/3218.jpg
2,2,2845,3.0,test_images/test/9213.jpg
3,4,2379,3.0,test_images/test/10018.jpg
4,4,1836,3.0,test_images/test/11355.jpg
...,...,...,...,...
3596,4,2092,2.1,test_images/test/9421.jpg
3597,3,2000,2.1,test_images/test/219.jpg
3598,5,3002,3.1,test_images/test/10333.jpg
3599,3,1811,3.1,test_images/test/2814.jpg


In [185]:
test_data=test_data_generator(test_,1)`

In [186]:
pred =model.predict(test_data)

In [187]:
pred

array([[482555.5 ],
       [638673.3 ],
       [736829.8 ],
       ...,
       [767492.06],
       [538274.7 ],
       [458078.4 ]], dtype=float32)

In [189]:
pred_df=pd.DataFrame()
pred_df['image_id']=test_df['image_id']
pred_df['Price']=pred

In [190]:
pred_df.head()

Unnamed: 0,image_id,Price
0,3076,482555.5
1,3218,638673.3125
2,9213,736829.8125
3,10018,646059.8125
4,11355,542856.4375


In [191]:
pred_df.to_csv("pred_df.csv",index=False)