In [13]:
import pandas as pd

df = pd.read_csv('Data/socal2.csv')[:5000]
df.head()

Unnamed: 0,image_id,street,citi,n_citi,bed,bath,sqft,price
0,0,1317 Van Buren Avenue,"Salton City, CA",317,3,2.0,1560,201900
1,1,124 C Street W,"Brawley, CA",48,3,2.0,713,228500
2,2,2304 Clark Road,"Imperial, CA",152,3,1.0,800,273950
3,3,755 Brawley Avenue,"Brawley, CA",48,3,1.0,1082,350000
4,4,2207 R Carrillo Court,"Calexico, CA",55,4,3.0,2547,385100


In [14]:
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OrdinalEncoder

In [15]:
X = df.drop(['price'],axis=1)
y = df.price

In [16]:
X_num = X.select_dtypes(include=["int64", "float64"]).columns
X_cat = X.select_dtypes(include=['object']).columns

In [17]:
numeric_transformer = Pipeline([('imputer',SimpleImputer(strategy='median')),
                                ('scaler',StandardScaler())])
categorical_transformer = Pipeline([('imputer',SimpleImputer(strategy='most_frequent')),
                                    ('encoder',OrdinalEncoder()),
                                ('scaler',StandardScaler())])



In [18]:
tabular_preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, X_num),
        ('cat', categorical_transformer, X_cat)
    ]
)

X_preprocessed = tabular_preprocessor.fit_transform(X)
X_preprocessed

array([[-1.73170443,  0.96467568, -0.34842841, ..., -0.3417661 ,
        -1.26548362,  0.92202006],
       [-1.73101161, -1.53008248, -0.34842841, ..., -1.33017222,
        -1.36504042, -1.55839367],
       [-1.73031879, -0.56556631, -0.34842841, ..., -1.22864762,
        -0.29714487, -0.6366183 ],
       ...,
       [ 1.73031879, -0.35225985, -0.34842841, ...,  1.33980795,
         0.97582066, -0.36846546],
       [ 1.73101161,  0.80701439,  0.66680589, ...,  3.74722451,
         1.30767664,  0.73766499],
       [ 1.73170443,  1.43765957,  0.66680589, ...,  1.37831728,
         0.47633485,  1.42480663]], shape=(5000, 7))

In [None]:
from tensorflow.keras.layers import Input, Dense, Concatenate, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50

# Tabular branch
num_tabular_features = X_preprocessed.shape[1]
tabular_input = Input(shape=(num_tabular_features,), name="tabular_input")
x_tab = Dense(128, activation='relu')(tabular_input)
x_tab = Dense(64, activation='relu')(x_tab)

# Image branch
image_input = Input(shape=(224,224,3), name="image_input")
resnet_base = ResNet50(weights='imagenet', include_top=False, input_tensor=image_input)
resnet_base.trainable = False   # IMPORTANT

x_img = GlobalAveragePooling2D()(resnet_base.output)
x_img = Dense(256, activation='relu')(x_img)

# Fusion
combined = Concatenate()([x_tab, x_img])
z = Dense(128, activation='relu')(combined)
z = Dense(64, activation='relu')(z)
output = Dense(1, activation='linear', name='price')(z)

model = Model(inputs=[tabular_input, image_input], outputs=output)
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

model.summary()


In [None]:
from tensorflow.keras.applications.resnet50 import preprocess_input
import tensorflow as tf
import numpy as np

def load_images(image_ids, folder_path="Data/socal2/socal_pics"):
    """
    Loads and preprocesses images for ResNet50.

    Args:
        image_ids (list): List of image IDs or filenames without extension.
        folder_path (str): Base folder where images are stored.

    Returns:
        np.array: Preprocessed images ready for ResNet50, shape (N, 224, 224, 3)
    """
    imgs = []
    for img_id in image_ids:
        path = f"{folder_path}/{img_id}.jpg"  # adjust extension if needed
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)      # decode JPEG
        img = tf.image.resize(img, (224, 224))           # resize to ResNet input
        img = preprocess_input(img)                      # ResNet preprocessing
        imgs.append(img)
    
    return np.array(imgs)


In [None]:
X_tab = X_preprocessed
X_img = load_images(df['image_id'].tolist())
y = df['price'].values


In [None]:
print(X_tab.shape)   # (N, num_features)
print(X_img.shape)   # (N, 224, 224, 3)
print(y.shape)       # (N,)


(5000, 7)
(5000, 224, 224, 3)
(5000,)


In [None]:
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

model_checkpoint = ModelCheckpoint(
    filepath='models/best_model.keras',
    monitor='val_loss',
    save_best_only=True
)


In [None]:
history = model.fit(
    [X_tab, X_img],
    y,
    validation_split=0.2,
    epochs=50,
    batch_size=16,
    callbacks=[early_stop, model_checkpoint]
)


Epoch 1/50
[1m 24/250[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m4:04[0m 1s/step - loss: 597647785984.0000 - mae: 668419.6536

AbortedError: Graph execution error:

Detected at node StatefulPartitionedCall/functional_1/conv5_block1_2_conv_1/BiasAdd defined at (most recent call last):
<stack traces unavailable>
Operation received an exception:Status: 1, message: could not create a memory object, in file tensorflow/core/kernels/mkl/mkl_conv_ops.cc:1112
	 [[{{node StatefulPartitionedCall/functional_1/conv5_block1_2_conv_1/BiasAdd}}]] [Op:__inference_multi_step_on_iterator_137492]