In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [11]:
#importlibraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
import joblib
import cv2
import os



In [4]:
PROJECT_DIR = "/content/drive/MyDrive/CDC_Project"
TEST_CSV = f"{PROJECT_DIR}/test2.csv"
TEST_IMAGE_DIR = f"{PROJECT_DIR}/images_test"


Loading saved assets

In [13]:
multi_model = keras.models.load_model(f"{PROJECT_DIR}/final_model_multimodal.keras")
rf = joblib.load(f"{PROJECT_DIR}/final_model_rf.joblib")
scaler = joblib.load(f"{PROJECT_DIR}/final_scaler.joblib")


In [14]:
test_df = pd.read_csv(TEST_CSV)
print(test_df.shape)
test_df.head()


(5404, 20)


Unnamed: 0,id,date,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,2591820310,20141006T000000,4,2.25,2070,8893,2.0,0,0,4,8,2070,0,1986,0,98058,47.4388,-122.162,2390,7700
1,7974200820,20140821T000000,5,3.0,2900,6730,1.0,0,0,5,8,1830,1070,1977,0,98115,47.6784,-122.285,2370,6283
2,7701450110,20140815T000000,4,2.5,3770,10893,2.0,0,2,3,11,3770,0,1997,0,98006,47.5646,-122.129,3710,9685
3,9522300010,20150331T000000,3,3.5,4560,14608,2.0,0,2,3,12,4560,0,1990,0,98034,47.6995,-122.228,4050,14226
4,9510861140,20140714T000000,3,2.5,2550,5376,2.0,0,0,3,9,2550,0,2004,0,98052,47.6647,-122.083,2250,4050


In [18]:
tabular_features = [
    "bedrooms", "bathrooms", "sqft_living", "sqft_lot", "floors", "waterfront", "view", "condition", "grade", "sqft_above", "sqft_basement", "yr_built", "yr_renovated", "lat", "long", "sqft_living15", "sqft_lot15"
]


In [19]:
X_tab = scaler.transform(test_df[tabular_features])


In [24]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array

def load_satellite(path):
    img = load_img(path, target_size=(224, 224))
    img = img_to_array(img) / 255.0
    return img



In [25]:
def build_path(row):
    return os.path.join(TEST_IMAGE_DIR, f"{row['id']}")

test_df["image_path"] = test_df.apply(build_path, axis=1)

X_img = []

for i, p in enumerate(test_df["image_path"]):
    if i % 100 == 0:
        print("Loaded:", i)
    X_img.append(load_satellite(p))

X_img = np.array(X_img)

Loaded: 0
Loaded: 100
Loaded: 200
Loaded: 300
Loaded: 400
Loaded: 500
Loaded: 600
Loaded: 700
Loaded: 800
Loaded: 900
Loaded: 1000
Loaded: 1100
Loaded: 1200
Loaded: 1300
Loaded: 1400
Loaded: 1500
Loaded: 1600
Loaded: 1700
Loaded: 1800
Loaded: 1900
Loaded: 2000
Loaded: 2100
Loaded: 2200
Loaded: 2300
Loaded: 2400
Loaded: 2500
Loaded: 2600
Loaded: 2700
Loaded: 2800
Loaded: 2900
Loaded: 3000
Loaded: 3100
Loaded: 3200
Loaded: 3300
Loaded: 3400
Loaded: 3500
Loaded: 3600
Loaded: 3700
Loaded: 3800
Loaded: 3900
Loaded: 4000
Loaded: 4100
Loaded: 4200
Loaded: 4300
Loaded: 4400
Loaded: 4500
Loaded: 4600
Loaded: 4700
Loaded: 4800
Loaded: 4900
Loaded: 5000
Loaded: 5100
Loaded: 5200
Loaded: 5300
Loaded: 5400


Now predicting the prices

In [27]:
#prediction of prices
batch_size = 32
preds_list = []

for i in range(0, len(test_df), batch_size):

    batch_df   = test_df.iloc[i:i+batch_size]

    # ---- IMAGES ----
    batch_paths = batch_df["image_path"].values
    batch_imgs  = []

    for path in batch_paths:
        img = tf.keras.utils.load_img(path, target_size=(128, 128))   # IMPORTANT
        img = tf.keras.utils.img_to_array(img)
        img = img / 255.0                                            # same as training
        batch_imgs.append(img)

    batch_imgs = np.array(batch_imgs)

    # ---- TABULAR ----
    batch_tab = batch_df[tabular_features].values   # same columns as training

    print(f"Predicting rows {i} → {i+len(batch_imgs)}")

    batch_preds = multi_model.predict(
        [batch_imgs, batch_tab],
        verbose=0
    )

    preds_list.append(batch_preds)

# ---- COMBINE ALL PREDICTIONS ----
final_preds = np.concatenate(preds_list, axis=0)

print(final_preds.shape)


Predicting rows 0 → 32
Predicting rows 32 → 64
Predicting rows 64 → 96
Predicting rows 96 → 128
Predicting rows 128 → 160
Predicting rows 160 → 192
Predicting rows 192 → 224
Predicting rows 224 → 256
Predicting rows 256 → 288
Predicting rows 288 → 320
Predicting rows 320 → 352
Predicting rows 352 → 384
Predicting rows 384 → 416
Predicting rows 416 → 448
Predicting rows 448 → 480
Predicting rows 480 → 512
Predicting rows 512 → 544
Predicting rows 544 → 576
Predicting rows 576 → 608
Predicting rows 608 → 640
Predicting rows 640 → 672
Predicting rows 672 → 704
Predicting rows 704 → 736
Predicting rows 736 → 768
Predicting rows 768 → 800
Predicting rows 800 → 832
Predicting rows 832 → 864
Predicting rows 864 → 896
Predicting rows 896 → 928
Predicting rows 928 → 960
Predicting rows 960 → 992
Predicting rows 992 → 1024
Predicting rows 1024 → 1056
Predicting rows 1056 → 1088
Predicting rows 1088 → 1120
Predicting rows 1120 → 1152
Predicting rows 1152 → 1184
Predicting rows 1184 → 1216
Predict

In [29]:
test_df["predicted_price"] = final_preds


In [30]:
test_df[["id", "predicted_price"]].to_csv(
    f"{PROJECT_DIR}/submission.csv",
    index=False
)

In [32]:
test_df.head()

Unnamed: 0,id,date,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,...,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15,image_path,predicted_price
0,2591820310,20141006T000000,4,2.25,2070,8893,2.0,0,0,4,...,0,1986,0,98058,47.4388,-122.162,2390,7700,/content/drive/MyDrive/CDC_Project/images_test...,510548.1
1,7974200820,20140821T000000,5,3.0,2900,6730,1.0,0,0,5,...,1070,1977,0,98115,47.6784,-122.285,2370,6283,/content/drive/MyDrive/CDC_Project/images_test...,723127.8
2,7701450110,20140815T000000,4,2.5,3770,10893,2.0,0,2,3,...,0,1997,0,98006,47.5646,-122.129,3710,9685,/content/drive/MyDrive/CDC_Project/images_test...,870907.7
3,9522300010,20150331T000000,3,3.5,4560,14608,2.0,0,2,3,...,0,1990,0,98034,47.6995,-122.228,4050,14226,/content/drive/MyDrive/CDC_Project/images_test...,1001087.0
4,9510861140,20140714T000000,3,2.5,2550,5376,2.0,0,0,3,...,0,2004,0,98052,47.6647,-122.083,2250,4050,/content/drive/MyDrive/CDC_Project/images_test...,622382.6


In [33]:
test_df.columns

Index(['id', 'date', 'bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot',
       'floors', 'waterfront', 'view', 'condition', 'grade', 'sqft_above',
       'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode', 'lat', 'long',
       'sqft_living15', 'sqft_lot15', 'image_path', 'predicted_price'],
      dtype='object')