In [1]:
import numpy as np
from glob import glob
import pandas as pd
import cv2
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder

### Read Tabular data

In [2]:
data = pd.read_csv("HousesInfo.txt",sep=" ")
data

Unnamed: 0,bedrooms,bathrooms,area,zipcode,price
0,4,4.0,4053,85255,869500
1,4,3.0,3343,36372,865200
2,3,4.0,3923,85266,889000
3,5,5.0,4022,85262,910000
4,3,4.0,4116,85266,971226
...,...,...,...,...,...
530,5,2.0,2066,94531,399900
531,4,3.5,9536,94531,460000
532,3,2.0,2014,94531,407000
533,4,3.0,2312,94531,419000


##### Data is already in uniform manner so no need to perform data cleaning

### Data transformation : 
##### Data preparation for training

In [3]:
le_area = LabelEncoder()
data["zipcode"] = le_area.fit_transform(data["zipcode"])

In [4]:
data["area"] = data["area"] / data["area"].max()

In [5]:
data["price"] = data["price"]/data["price"].max()

In [6]:
data

Unnamed: 0,bedrooms,bathrooms,area,zipcode,price
0,4,4.0,0.422936,13,0.148429
1,4,3.0,0.348847,0,0.147695
2,3,4.0,0.409371,15,0.151758
3,5,5.0,0.419702,14,0.155343
4,3,4.0,0.429511,15,0.165795
...,...,...,...,...,...
530,5,2.0,0.215590,42,0.068266
531,4,3.5,0.995095,42,0.078525
532,3,2.0,0.210164,42,0.069478
533,4,3.0,0.241261,42,0.071526


In [12]:
np.save("Text-numpy",data.values)

### Load Image data and process it

In [7]:
bathroom_imgs_path = glob("/kaggle/input/house-price/Houses Dataset/*_bathroom.jpg")
bedroom_imgs_path = glob("/kaggle/input/house-price/Houses Dataset/*_bedroom.jpg")
frontal_imgs_path = glob("/kaggle/input/house-price/Houses Dataset/*_frontal.jpg")
kitchen_imgs_path = glob("/kaggle/input/house-price/Houses Dataset/*_kitchen.jpg")


# Check the number of images loaded
print(f"Bathroom Images: {len(bathroom_imgs_path)}")
print(f"Bedroom Images: {len(bedroom_imgs_path)}")
print(f"Frontal Images: {len(frontal_imgs_path)}")
print(f"Kitchen Images: {len(kitchen_imgs_path)}")

In [8]:
len(bathroom_imgs_path),len(bedroom_imgs_path),len(frontal_imgs_path),len(kitchen_imgs_path)

(535, 535, 535, 535)

#### Sequence of all four categories images are in same way so we can directly start with loading images

In [9]:
images = []
target_shape = (100,100) 
for i in tqdm(range(len(bathroom_imgs_path))):
    bath_img = cv2.imread(bathroom_imgs_path[i])
    bed_img = cv2.imread(bedroom_imgs_path[i])
    frontal_img = cv2.imread(frontal_imgs_path[i])
    kitchen_img = cv2.imread(kitchen_imgs_path[i])
    
    bath_img = cv2.resize(bath_img,target_shape)
    bed_img = cv2.resize(bed_img,target_shape)
    frontal_img = cv2.resize(frontal_img,target_shape)
    kitchen_img = cv2.resize(kitchen_img,target_shape)
    
    concat_img = np.hstack([bath_img,bed_img,frontal_img,kitchen_img])
    images.append(concat_img)
images = np.array(images)
    

100%|████████████████████████████████████████████████████████████████████████████████| 535/535 [00:57<00:00,  9.32it/s]


In [10]:
images.shape

(535, 100, 400, 3)

In [16]:
np.save("Images-numpy",images)

##### here we had prepared data for training Now, we can start model building and optimization.