In [11]:
!pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-win_amd64.whl.metadata (4.6 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-win_amd64.whl.metadata (5.3 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting tensorboard~=2.20.0 (from tensorflow)
  Downloading tensorboard-2.20.0-py3-none-any.whl.metadata (1.8 kB)
Collecting keras>=3.10.0 (from tensorflow)
  Downloading keras-3.11.3-py3-none-any

In [2]:
!pip install kagglehub

Collecting kagglehub
  Downloading kagglehub-0.3.13-py3-none-any.whl.metadata (38 kB)
Downloading kagglehub-0.3.13-py3-none-any.whl (68 kB)
Installing collected packages: kagglehub
Successfully installed kagglehub-0.3.13


In [5]:
pip install numpy==1.26.4

Note: you may need to restart the kernel to use updated packages.


In [4]:
!pip uninstall opencv-python -y
!pip install opencv-python==4.8.1.78


Found existing installation: opencv-python 4.12.0.88
Uninstalling opencv-python-4.12.0.88:
  Successfully uninstalled opencv-python-4.12.0.88
Collecting opencv-python==4.8.1.78
  Downloading opencv_python-4.8.1.78-cp37-abi3-win_amd64.whl.metadata (20 kB)
Downloading opencv_python-4.8.1.78-cp37-abi3-win_amd64.whl (38.1 MB)
   ---------------------------------------- 0.0/38.1 MB ? eta -:--:--
   --- ------------------------------------ 3.1/38.1 MB 23.1 MB/s eta 0:00:02
   ------- -------------------------------- 6.8/38.1 MB 28.0 MB/s eta 0:00:02
   ------- -------------------------------- 7.3/38.1 MB 14.2 MB/s eta 0:00:03
   --------- ------------------------------ 8.9/38.1 MB 12.0 MB/s eta 0:00:03
   ---------- ----------------------------- 9.7/38.1 MB 10.2 MB/s eta 0:00:03
   ----------- ---------------------------- 10.7/38.1 MB 8.8 MB/s eta 0:00:04
   ------------ --------------------------- 11.5/38.1 MB 8.3 MB/s eta 0:00:04
   ------------ --------------------------- 12.3/38.1 MB 7.7

In [3]:
import pandas as pd
import numpy as np
import os
import tensorflow as tf
import cv2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.preprocessing.image import load_img, img_to_array


# ==========================
# 1. Load Dataset
# ==========================
data = pd.read_csv("socal2.csv")

# Drop non-numeric columns (like addresses, file paths, etc.)
non_numeric = ["Address", "Street", "City", "State", "Zip"]
for col in non_numeric:
    if col in data.columns:
        data = data.drop(columns=[col])

# Target variable
y = data["price"]   # adjust target column name if needed

# Extract tabular features
X_tab = data.drop(columns=["price", "filename"], errors="ignore")
X_tab = X_tab.select_dtypes(include=[np.number])

# Scale numeric features
scaler = StandardScaler()
X_tab = scaler.fit_transform(X_tab)

# ==========================
# 2. Load Images
# ==========================
image_folder = "socal_pics"   # folder containing house images
images = []

for fname in data["image_id"]:
    path = os.path.join(image_folder, str(fname) + '.jpg')  # convert to string
    img = cv2.imread(path)
    if img is not None:
        img = cv2.resize(img, (128, 128))
        images.append(img)
    else:
        print(f"⚠️ Image not found: {path}")


X_img = np.array(images)

# ==========================
# 3. Train-Test Split
# ==========================
X_tab_train, X_tab_test, X_img_train, X_img_test, y_train, y_test = train_test_split(
    X_tab, X_img, y, test_size=0.2, random_state=42
)

# ==========================
# 4. Build Model
# ==========================

# CNN branch for images
image_input = tf.keras.Input(shape=(128, 128, 3))
x = tf.keras.layers.Conv2D(32, (3,3), activation="relu")(image_input)
x = tf.keras.layers.MaxPooling2D((2,2))(x)
x = tf.keras.layers.Conv2D(64, (3,3), activation="relu")(x)
x = tf.keras.layers.MaxPooling2D((2,2))(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(64, activation="relu")(x)

# Dense branch for tabular data
tab_input = tf.keras.Input(shape=(X_tab.shape[1],))
y1 = tf.keras.layers.Dense(64, activation="relu")(tab_input)
y1 = tf.keras.layers.Dense(32, activation="relu")(y1)

# Combine branches
combined = tf.keras.layers.concatenate([x, y1])
z = tf.keras.layers.Dense(64, activation="relu")(combined)
z = tf.keras.layers.Dense(1)(z)  # regression output

# Build model
model = tf.keras.Model(inputs=[image_input, tab_input], outputs=z)

model.compile(optimizer="adam", loss="mse", metrics=["mae", "mse"])

# ==========================
# 5. Train Model
# ==========================
history = model.fit(
    [X_img_train, X_tab_train], y_train,
    validation_data=([X_img_test, X_tab_test], y_test),
    epochs=10,
    batch_size=32
)

# ==========================
# 6. Evaluate Model
# ==========================
loss, mae, mse = model.evaluate([X_img_test, X_tab_test], y_test)
print(f"Test MAE: {mae:.2f}")
print(f"Test RMSE: {np.sqrt(mse):.2f}")


Epoch 1/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 798443175936.0000 - mae: 653695.8125 - mse: 798443175936.0000 - val_loss: 942650425344.0000 - val_mae: 755060.5625 - val_mse: 942650425344.0000
Epoch 2/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 311ms/step - loss: 797095428096.0000 - mae: 652696.7500 - mse: 797095428096.0000 - val_loss: 939650383872.0000 - val_mae: 753136.3125 - val_mse: 939650383872.0000
Epoch 3/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 317ms/step - loss: 794707165184.0000 - mae: 650922.9375 - mse: 794707165184.0000 - val_loss: 934815531008.0000 - val_mae: 750033.3125 - val_mse: 934815531008.0000
Epoch 4/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 318ms/step - loss: 790850174976.0000 - mae: 648054.9375 - mse: 790850174976.0000 - val_loss: 927947816960.0000 - val_mae: 745604.5625 - val_mse: 927947816960.0000
Epoch 5/10
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[