In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, Concatenate
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from tensorflow.keras.preprocessing.image import load_img, img_to_array

#load Dataset
df = pd.read_csv('/kaggle/input/house-prices-and-images-socal/socal2.csv')
df = df.head(500).copy().reset_index(drop=True)

#tabular Data
features = ['bed', 'bath', 'sqft']
target = 'price'
scaler = StandardScaler()
X_tab = scaler.fit_transform(df[features])
y = df[target].values

IMG_SIZE = (128, 128)
IMG_DIR = '/kaggle/input/house-prices-and-images-socal/socal2/socal_pics'

def process_image(image_id):
    path = os.path.join(IMG_DIR, f"{int(image_id)}.jpg")
    try:
        img = load_img(path, target_size=IMG_SIZE)
        img = img_to_array(img).astype('float32')
        return preprocess_input(img)
    except:
        return np.zeros((IMG_SIZE[0], IMG_SIZE[1], 3))  # fallback if image not found

X_img = np.array([process_image(i) for i in df['image_id']])

#split
X_img_train, X_img_test, X_tab_train, X_tab_test, y_train, y_test = train_test_split(
    X_img, X_tab, y, test_size=0.2, random_state=42
)

#model with No Pretrained Weights
img_input = Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
base_cnn = MobileNetV2(include_top=False, weights=None, input_tensor=img_input)
base_cnn.trainable = True  # train from scratch

x = base_cnn.output
x = Flatten()(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.3)(x)
img_output = Dense(64, activation='relu')(x)

tab_input = Input(shape=(X_tab.shape[1],))
y_dense = Dense(64, activation='relu')(tab_input)
y_dense = Dropout(0.3)(y_dense)
tab_output = Dense(32, activation='relu')(y_dense)

combined = Concatenate()([img_output, tab_output])
z = Dense(64, activation='relu')(combined)
z = Dropout(0.3)(z)
z = Dense(32, activation='relu')(z)
output = Dense(1)(z)

model = Model(inputs=[img_input, tab_input], outputs=output)
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()

#train
history = model.fit(
    [X_img_train, X_tab_train], y_train,
    validation_data=([X_img_test, X_tab_test], y_test),
    epochs=10,
    batch_size=16
)


preds = model.predict([X_img_test, X_tab_test])
print("MAE:", mean_absolute_error(y_test, preds))


Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m63s[0m 610ms/step - loss: 708156719104.0000 - mae: 686801.1250 - val_loss: 879757295616.0000 - val_mae: 783814.3750
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 498ms/step - loss: 640365821952.0000 - mae: 651675.5625 - val_loss: 874438918144.0000 - val_mae: 780415.6250
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 504ms/step - loss: 559398322176.0000 - mae: 588681.0625 - val_loss: 815994241024.0000 - val_mae: 742033.0625
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 483ms/step - loss: 195919659008.0000 - mae: 339099.8438 - val_loss: 763614658560.0000 - val_mae: 705867.0625
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 496ms/step - loss: 164275994624.0000 - mae: 329279.4062 - val_loss: 741559631872.0000 - val_mae: 690082.5625
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m