# Imports

In [1]:
import os
import numpy as np
import pandas as pd

from PIL import Image
import base64
import io

import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Concatenate, Rescaling
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

from tensorflow.keras import backend as K
import gc

import ollama

from tqdm import tqdm

import psutil
import subprocess
import time

In [2]:
# Force CUDA usage
os.environ["OLLAMA_BACKEND"] = "cuda"
os.environ["OLLAMA_NUM_THREADS"] = "16"

# Parameters

In [3]:
# Ollama visual models
llms = ['gemma3:4b', 'llava:7b', 'llava-llama3:8b']

In [4]:
# list of all letters to stop on LLM should only return integer
stop_chars = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

In [5]:
# Number of rows to generate features for
n_rows = 5000

In [6]:
# NN Parameters
epochs = 25
batch_size = 16

# Loading data

In [7]:
# Set test set as the same size as the number of rows we process with LLM
df_train, df_test = train_test_split(pd.read_csv("houses_preprocessed.csv"), test_size=n_rows, shuffle=True, random_state=42)

In [8]:
df_test.head()

Unnamed: 0,n_citi,bed,bath,sqft,price,image
4772,-0.530372,-0.489366,-0.472771,-0.47392,898000,houses_preprocessed/4793.jpg
3707,1.018093,-0.489366,-0.472771,-0.753836,554900,houses_preprocessed/3727.jpg
14159,-1.286806,0.477001,0.570296,0.513102,969000,houses_preprocessed/14333.jpg
6934,0.145969,1.443367,0.570296,0.771561,634900,houses_preprocessed/7055.jpg
13453,0.341752,-1.455732,-1.515838,-1.502881,397000,houses_preprocessed/13627.jpg


# LLM Regression

## Method to allign image for Ollama visual models

In [9]:
def df_image_path_to_base64(image_path):
    # Memory management
    with Image.open(image_path) as img:
        with io.BytesIO() as buffered:
            img.save(buffered, format="JPEG")

            return base64.b64encode(buffered.getvalue()).decode('utf-8')

In [10]:
def check_memory(llm):
    # Code to fix memory leak, if above 85% memory usage reload Ollama model
    if psutil.virtual_memory().percent > 85:
        print("Reseting Memory...")
        subprocess.run(['ollama', 'stop', llm])
        time.sleep(5)
        subprocess.run(['ollama', 'run', llm])

## Qualified loop

In [11]:
prompt = '''you MUST Estimate the full dollar value of this house based solely on the image.

RULES:
1. Respond with the FULL NUMBER in digits only (e.g., write 500000 instead of 500 or 500k).
2. Do NOT use any symbols, text, letters, commas, or punctuation.
3. Do NOT explain or justify your answer.
4. If the image is unclear, make your best estimate.

Your response MUST be a full number like 450000 or 375000:'''

In [12]:
# Iterate through models
for index, model in enumerate(llms):
    print("Processing Model: " + model + " (Model " + str(index + 1) + "/" + str(len(llms)) + ")")

    # For each row
    for index, row in tqdm(df_test.iterrows(), total=len(df_test), desc="Predicting rows"):  
        # Memory leak fix
        check_memory(model)   
        
        raw_response = None
        attempts = 0
        success = False
        
        # Try up to 10 times
        while attempts < 10 and not success:
            try:
                # Do the necessary image conversion
                image = df_image_path_to_base64(row['image'])
                
                # Regress the price, get rid of commas and periods. Also stop when LLM returns any letter
                raw_response = ollama.generate(model=model, prompt=prompt, images=[image], options={"stop": stop_chars})['response']
                response = int(''.join(filter(str.isdigit, raw_response)))

                # Raise an exception if the response is not between 10,000 and 10,000,000
                if not (10000 <= response <= 10000000):
                    raise ValueError(f"Response value {response} is out of the acceptable range (10,000 to 10,000,000).")

                # Store response
                df_test.at[index, f"{model}_predicted_price"] = response
                success = True
                
            except Exception as e:
                attempts += 1
                
        # If all attempts failed, store 550k (median)
        if not success:
            df_test.at[index, f"{model}_predicted_price"] = 550000
            print(f"All attempts failed for model {model}, row {index}. Storing default value.")
            print(raw_response)

Processing Model: gemma3:4b (Model 1/3)


Predicting rows:  10%|██████████████████████▋                                                                                                                                                                                                       | 512/5000 [38:47<5:38:54,  4.53s/it]

Reseting Memory...


Predicting rows:  21%|█████████████████████████████████████████████▉                                                                                                                                                                             | 1049/5000 [1:18:57<4:52:42,  4.44s/it]

Reseting Memory...


Predicting rows:  32%|█████████████████████████████████████████████████████████████████████▎                                                                                                                                                     | 1582/5000 [1:58:42<4:14:08,  4.46s/it]

Reseting Memory...


Predicting rows:  42%|████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                                                              | 2111/5000 [2:38:25<3:34:36,  4.46s/it]

Reseting Memory...


Predicting rows:  53%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                                       | 2649/5000 [3:18:54<2:55:21,  4.48s/it]

Reseting Memory...


Predicting rows:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                               | 3198/5000 [3:59:50<2:13:46,  4.45s/it]

Reseting Memory...


Predicting rows:  75%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                       | 3739/5000 [4:40:11<1:33:25,  4.45s/it]

Reseting Memory...


Predicting rows:  86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                | 4275/5000 [5:20:12<54:51,  4.54s/it]

Reseting Memory...


Predicting rows:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 4820/5000 [6:00:56<13:21,  4.45s/it]

Reseting Memory...


Predicting rows: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [6:14:30<00:00,  4.49s/it]


Processing Model: llava:7b (Model 2/3)


Predicting rows: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [4:48:35<00:00,  3.46s/it]


Processing Model: llava-llama3:8b (Model 3/3)


Predicting rows:  99%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊  | 4951/5000 [4:11:45<03:15,  4.00s/it]

All attempts failed for model llava-llama3:8b, row 11513. Storing default value.
216


Predicting rows: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [4:14:14<00:00,  3.05s/it]


In [13]:
df_test.head()

Unnamed: 0,n_citi,bed,bath,sqft,price,image,gemma3:4b_predicted_price,llava:7b_predicted_price,llava-llama3:8b_predicted_price
4772,-0.530372,-0.489366,-0.472771,-0.47392,898000,houses_preprocessed/4793.jpg,375000.0,123456.0,3000000.0
3707,1.018093,-0.489366,-0.472771,-0.753836,554900,houses_preprocessed/3727.jpg,285000.0,150000.0,500000.0
14159,-1.286806,0.477001,0.570296,0.513102,969000,houses_preprocessed/14333.jpg,175000.0,182000.0,500000.0
6934,0.145969,1.443367,0.570296,0.771561,634900,houses_preprocessed/7055.jpg,675000.0,190000.0,500000.0
13453,0.341752,-1.455732,-1.515838,-1.502881,397000,houses_preprocessed/13627.jpg,175000.0,510000.0,750000.0


### Calculate MAE for LLM classifier

In [14]:
# For each model calculat MAE
for model in llms:
    # Column names per model
    col_name = model + "_predicted_price"
    mae_col_name = model + "_" + 'MAE'
    
    # Calculate absolute errors
    df_test[mae_col_name] = abs(df_test['price'] - df_test[col_name])

In [15]:
df_test.head()

Unnamed: 0,n_citi,bed,bath,sqft,price,image,gemma3:4b_predicted_price,llava:7b_predicted_price,llava-llama3:8b_predicted_price,gemma3:4b_MAE,llava:7b_MAE,llava-llama3:8b_MAE
4772,-0.530372,-0.489366,-0.472771,-0.47392,898000,houses_preprocessed/4793.jpg,375000.0,123456.0,3000000.0,523000.0,774544.0,2102000.0
3707,1.018093,-0.489366,-0.472771,-0.753836,554900,houses_preprocessed/3727.jpg,285000.0,150000.0,500000.0,269900.0,404900.0,54900.0
14159,-1.286806,0.477001,0.570296,0.513102,969000,houses_preprocessed/14333.jpg,175000.0,182000.0,500000.0,794000.0,787000.0,469000.0
6934,0.145969,1.443367,0.570296,0.771561,634900,houses_preprocessed/7055.jpg,675000.0,190000.0,500000.0,40100.0,444900.0,134900.0
13453,0.341752,-1.455732,-1.515838,-1.502881,397000,houses_preprocessed/13627.jpg,175000.0,510000.0,750000.0,222000.0,113000.0,353000.0


In [16]:
print(df_test.columns)

Index(['n_citi', 'bed', 'bath', 'sqft', 'price', 'image',
       'gemma3:4b_predicted_price', 'llava:7b_predicted_price',
       'llava-llama3:8b_predicted_price', 'gemma3:4b_MAE', 'llava:7b_MAE',
       'llava-llama3:8b_MAE'],
      dtype='object')


# Train and Test models on the same data partioning

## Experimental set up

In [17]:
X_train_tab = df_train[['n_citi', 'bed', 'bath', 'sqft']].values 
X_train_img = df_train['image']
y_train = df_train['price']

X_test_tab = df_test[['n_citi', 'bed', 'bath', 'sqft']].values 
X_test_img = df_test['image']
y_test = df_test['price']

print("Training Data Shapes:")
print(f"Tabular features: {X_train_tab.shape}")
print(f"Image features: {X_train_img.shape}")
print(f"Target prices: {y_train.shape}")

print("\nTest Data Shapes:")
print(f"Tabular features: {X_test_tab.shape}")
print(f"Image features: {X_test_img.shape}")
print(f"Target prices: {y_test.shape}")

Training Data Shapes:
Tabular features: (10297, 4)
Image features: (10297,)
Target prices: (10297,)

Test Data Shapes:
Tabular features: (5000, 4)
Image features: (5000,)
Target prices: (5000,)


## Creating Neural Networks and Models

### Base NN and Resnet

In [18]:
def base_nn(image_shape=(311, 415, 3)):
    # Image processing branch
    img_input = Input(shape=image_shape, name='image_input')
    x = Conv2D(32, (3, 3), activation='relu')(img_input)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    output = Dense(1)(x) # Regression output for price prediction

    # Define the model
    nn_model = Model(inputs=img_input, outputs=output)
    
    # Compile the model
    nn_model.compile(optimizer='adam',
                  loss='mae',
                  metrics=['mae', 'R2Score'])
    
    # Display model summary debug
    # nn_model.summary()

    return nn_model

In [19]:
def resnet_nn(image_shape=(311, 415, 3)):
    # Image processing branch with pre-trained ResNet50
    res_net = ResNet50(weights='imagenet', include_top=False, input_shape=image_shape)
   
    # Unfreeze only the last 10 layers of resnet (fine-tuning) 
    res_net.trainable = False 
    for layer in res_net.layers[-10:]:
        layer.trainable = True

    # Image processing branch
    img_input = Input(shape=image_shape, name='image_input')
    x = res_net(img_input)
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    output = Dense(1)(x) # Regression output for price prediction
    
    # Define the model
    res_net_model = Model(inputs=img_input, outputs=output)
    
    # Compile the model
    res_net_model.compile(optimizer='adam', 
                          loss='mae',
                          metrics=['mae', 'R2Score'])
    
    # Display model summary debug
    # res_net_model.summary()

    return res_net_model

In [20]:
'''
I did not write this code, the code is from: https://www.tensorflow.org/tutorials/load_data/images
It helps us train the NN more dynamically, it loads images on the go, such that not all RAM is used up.
It does try to maximise RAM usage this is basically what the tf.data.AUTOTUNE does.
'''

# Loads an image and normalizes it from [0,1]
def process_example(image_path, label):
    # Load raw bytes and convert to RGB
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)

    # Normalize image to [0, 1] and convert to float32
    image = tf.image.convert_image_dtype(image, tf.float32)

    return image, label


# Creates on the fly data sets to train/test the model, we need this to not exceed memory
def create_dataset(image_paths, labels, shuffle=True):
    # Convert to tensors
    image_paths = tf.convert_to_tensor(image_paths)
    labels = tf.convert_to_tensor(labels, dtype=tf.float32)

    # Build dataset
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, labels))
    dataset = dataset.map(lambda img, lbl: process_example(img, lbl), num_parallel_calls=tf.data.AUTOTUNE)
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(image_paths))
    
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    
    return dataset


def train_and_evaluate_nn(nn, 
                          X_train_img_paths, y_train,
                          X_test_img_paths, y_test,
                          verbose=1):

    # Dynamic dataset loading
    train_robustified = create_dataset(X_train_img_paths, y_train, shuffle=True) # Shuffle to break ordering
    test_robustified = create_dataset(X_test_img_paths, y_test, shuffle=False) # No shuffle, we arent learning, just predicting

    # Train and Test
    history = nn.fit(train_robustified, epochs=epochs, verbose=verbose)
    test_loss, test_mae, r2 = nn.evaluate(test_robustified, verbose=0)

    return history, test_loss, test_mae, r2

### Logistic regression

In [21]:
def train_and_evaluate_lin_model(model, X_train_tab, y_train, X_test_tab, y_test):
    # Train the model
    model.fit(X_train_tab, y_train)
    
    # Evaluate the model
    y_test_pred = model.predict(X_test_tab)
    mae_test = mean_absolute_error(y_test, y_test_pred)
    r2 = r2_score(y_test, y_test_pred)
    
    return mae_test, r2

### Train and Evaluate

In [22]:
# Create NNs (image only)
nn_base = base_nn()
nn_resnet = resnet_nn()
lin = LinearRegression()

In [23]:
# NN
print("Training Base NN")
nn_base_hist, _, nn_base_mae, nn_base_r2 = train_and_evaluate_nn(nn_base, X_train_img, y_train, X_test_img, y_test)
print(f"NN Base MAE: {nn_base_mae:.0f}\nNN Base R2: {nn_base_r2:.2f}")

# Try to clear NN from memory
K.clear_session()
gc.collect()

Training Base NN
Epoch 1/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m770s[0m 1s/step - R2Score: -1.3903 - loss: 444562.7500 - mae: 444562.7500
Epoch 2/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m757s[0m 1s/step - R2Score: -0.1489 - loss: 287296.1875 - mae: 287296.1875
Epoch 3/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m754s[0m 1s/step - R2Score: -0.1465 - loss: 284436.2812 - mae: 284436.2812
Epoch 4/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m759s[0m 1s/step - R2Score: -0.1307 - loss: 281265.2188 - mae: 281265.2188
Epoch 5/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m755s[0m 1s/step - R2Score: -0.1233 - loss: 279358.1562 - mae: 279358.1562
Epoch 6/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m759s[0m 1s/step - R2Score: -0.1020 - loss: 276307.0000 - mae: 276307.0000
Epoch 7/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m754s[0m 1s/step - R2Score: -0.1006 - loss:

0

In [24]:
# Resnet
print("Training Resnet")
nn_resnet_hist, _, nn_resnet_mae, nn_resnet_r2 = train_and_evaluate_nn(nn_resnet, X_train_img, y_train, X_test_img, y_test)
print(f"Resnet MAE: {nn_resnet_mae:.0f}\nResnet R2: {nn_resnet_r2:.2f}")

# Try to clear NN from memory
K.clear_session()
gc.collect()

Training Resnet
Epoch 1/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1681s[0m 3s/step - R2Score: -3.3015 - loss: 693742.5000 - mae: 693742.5000
Epoch 2/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1694s[0m 3s/step - R2Score: -1.4881 - loss: 462300.3125 - mae: 462300.3125
Epoch 3/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1721s[0m 3s/step - R2Score: 0.0532 - loss: 260306.7812 - mae: 260306.7812
Epoch 4/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1716s[0m 3s/step - R2Score: 0.0597 - loss: 256366.4062 - mae: 256366.4062
Epoch 5/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1726s[0m 3s/step - R2Score: 0.0691 - loss: 257995.2969 - mae: 257995.2969
Epoch 6/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1728s[0m 3s/step - R2Score: 0.0806 - loss: 256774.9844 - mae: 256774.9844
Epoch 7/25
[1m644/644[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1724s[0m 3s/step - R2Score: 0.0939 - loss

0

In [25]:
# LR
print("Training LR")
lr_mae, lr_r2 = train_and_evaluate_lin_model(lin, X_train_tab, y_train, X_test_tab, y_test)
print(f"LR MAE: {lr_mae:.0f}\nLR R2: {lr_r2:.2f}")

Training LR
LR MAE: 223193
LR R2: 0.34


# Compare

In [26]:
# MAEs
llava_7b_MAE = df_test['llava:7b_MAE'].mean()
llava_llama3_8b_MAE = df_test['llava-llama3:8b_MAE'].mean()
gemma3_4b_MAE = df_test['gemma3:4b_MAE'].mean()

# R2s
llava_7b_r2 = r2_score(df_test['price'], df_test['llava:7b_predicted_price'])
llava_llama3_8b_r2 = r2_score(df_test['price'], df_test['llava-llama3:8b_predicted_price'])
gemma3_4b_r2 = r2_score(df_test['price'], df_test['gemma3:4b_predicted_price'])

# Create a dictionary with the model names and their performance metrics
comparison = {
    'Model': ['llava:7b', 'llava-llama3:8b', 'gemma3:4b', 'NN_base', 'NN_ResNet', 'LR'],
    'MAE': [
        round(llava_7b_MAE),
        round(llava_llama3_8b_MAE),
        round(gemma3_4b_MAE),
        round(nn_base_mae),
        round(nn_resnet_mae),
        round(lr_mae)
    ],
    'R2': [
        round(llava_7b_r2, 3),
        round(llava_llama3_8b_r2, 3),
        round(gemma3_4b_r2, 3),
        round(nn_base_r2, 3),
        round(nn_resnet_r2, 3),
        round(lr_r2, 3)
    ]
}

# Make into df
comparison_df = pd.DataFrame(comparison).set_index("Model")
display(comparison_df)

Unnamed: 0_level_0,MAE,R2
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
llava:7b,484657,-1.977
llava-llama3:8b,623507,-10.402
gemma3:4b,304654,-0.265
NN_base,264775,-0.005
NN_ResNet,316761,-0.434
LR,223193,0.341
