# Imports

In [1]:
import os
import numpy as np
import pandas as pd

from PIL import Image
import base64
import io

import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense, Concatenate, Rescaling
from tensorflow.keras.models import Model
from tensorflow.keras.applications import ResNet50

from tensorflow.keras import backend as K
import gc

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

import ollama

from tqdm import tqdm

import psutil
import subprocess
import time

In [2]:
# Force CUDA usage
os.environ["OLLAMA_BACKEND"] = "cuda"
os.environ["OLLAMA_NUM_THREADS"] = "16"

# Parameters

In [3]:
# Ollama visual models
llms = ['gemma3:4b', 'llava:7b', 'llava-llama3:8b']

In [4]:
# list of all letters to stop on LLM should only return integer
stop_chars = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")

In [5]:
# Number of rows to generate features for
n_rows = 5000

In [6]:
# NN Parameters
epochs = 25
batch_size = 16

# Loading data

In [7]:
# Work df is randomly sampled df with the size of n_rows which is the number of rows that will be processed by LLMs
_, work_df = train_test_split(pd.read_csv("houses_preprocessed.csv"), test_size=n_rows, shuffle=True, random_state=42)

In [8]:
work_df.head()

Unnamed: 0,n_citi,bed,bath,sqft,price,image
4772,-0.530372,-0.489366,-0.472771,-0.47392,898000,houses_preprocessed/4793.jpg
3707,1.018093,-0.489366,-0.472771,-0.753836,554900,houses_preprocessed/3727.jpg
14159,-1.286806,0.477001,0.570296,0.513102,969000,houses_preprocessed/14333.jpg
6934,0.145969,1.443367,0.570296,0.771561,634900,houses_preprocessed/7055.jpg
13453,0.341752,-1.455732,-1.515838,-1.502881,397000,houses_preprocessed/13627.jpg


# LLM Feature Generation

## Method to allign image for Ollama visual models

In [9]:
def df_image_path_to_base64(image_path):
    # Memory management
    with Image.open(image_path) as img:
        with io.BytesIO() as buffered:
            img.save(buffered, format="JPEG")

            return base64.b64encode(buffered.getvalue()).decode('utf-8')

In [10]:
def check_memory(llm):
    # Code to fix memory leak, if above 85% memory usage reload Ollama model
    if psutil.virtual_memory().percent > 85:
        print("Reseting Memory...")
        subprocess.run(['ollama', 'stop', llm])
        time.sleep(5)
        subprocess.run(['ollama', 'run', llm])

## Qualified loop

In [11]:
prompt = '''Analyze the image of the house and score it from 1 to 10 strictly based on {metric}. 
YOU MUST FOLLOW THESE RULES:
1. Return ONLY a single integer between 1 and 10, NOTHING ELSE.
2. Do not provide explanations, disclaimers, or additional text.
3. If the image is unclear try your best anyway to rate it.

Your response must be ONLY the number:'''

In [12]:
metrics = ['condition', 'size', 'material', 'uniqueness', 'maintenance']

In [13]:
# Iterate through models
for index, model in enumerate(llms):
    print("Processing Model: " + model + " (Model " + str(index + 1) + "/" + str(len(llms)) + ")")

    # For each row
    for index, row in tqdm(work_df.iterrows(), total=len(work_df), desc="Parsing rows"):     
        # Memory leak fix
        check_memory(model)
    
        # For each feature that we will generate 
        for metric in metrics:

            raw_response = None
            attempts = 0
            success = False

            # Try up to 10 times
            while attempts < 10 and not success:
                try:
                    # Format the prompt based on the metric (feature)
                    formatted_prompt = prompt.format(metric=metric)
                    
                    # Do the necessary image conversion
                    image = df_image_path_to_base64(row['image'])
                    
                    # Regress the price
                    raw_response = ollama.generate(model=model, 
                                                   prompt=formatted_prompt, 
                                                   images=[image], 
                                                   options={"stop": stop_chars})['response']
                    response = int(''.join(filter(str.isdigit, raw_response)))
                    
                    # Assert validity response
                    assert 1 <= response <= 10
                    
                    # Store response
                    work_df.at[index, f"{model}_{metric}"] = response
                    success = True
                except Exception as e:
                    attempts += 1
            
            # If all attempts failed, store 5
            if not success:
                work_df.at[index, f"{model}_{metric}"] = 5
                print(f"All attempts failed for model {model}, metric {metric}, row {index}. Storing default value 5.")
                print(raw_response)

Processing Model: gemma3:4b (Model 1/3)


Parsing rows:   5%|███▎                                                           | 267/5000 [27:12<7:57:33,  6.05s/it]

Reseting Memory...


Parsing rows:   9%|█████▊                                                         | 465/5000 [48:45<9:28:08,  7.52s/it]

Reseting Memory...


Parsing rows:  16%|█████████▋                                                  | 810/5000 [3:11:24<10:35:40,  9.10s/it]

Reseting Memory...


Parsing rows:  20%|███████████▉                                                 | 977/5000 [3:31:58<7:52:44,  7.05s/it]

Reseting Memory...


Parsing rows:  25%|██████████████▋                                             | 1227/5000 [3:58:01<6:31:30,  6.23s/it]

Reseting Memory...


Parsing rows:  30%|█████████████████▊                                          | 1483/5000 [4:24:40<6:00:03,  6.14s/it]

Reseting Memory...


Parsing rows:  34%|████████████████████▍                                       | 1703/5000 [4:47:37<5:40:36,  6.20s/it]

Reseting Memory...


Parsing rows:  39%|███████████████████████▍                                    | 1955/5000 [5:13:59<5:14:48,  6.20s/it]

Reseting Memory...


Parsing rows:  44%|██████████████████████████▌                                 | 2210/5000 [5:40:29<4:47:08,  6.17s/it]

Reseting Memory...


Parsing rows:  48%|████████████████████████████▊                               | 2404/5000 [6:00:42<4:31:56,  6.29s/it]

Reseting Memory...


Parsing rows:  53%|███████████████████████████████▊                            | 2648/5000 [6:26:04<4:04:34,  6.24s/it]

Reseting Memory...


Parsing rows:  57%|██████████████████████████████████                          | 2838/5000 [6:45:56<3:45:54,  6.27s/it]

Reseting Memory...


Parsing rows:  62%|████████████████████████████████████▉                       | 3081/5000 [7:11:11<3:19:53,  6.25s/it]

Reseting Memory...


Parsing rows:  65%|███████████████████████████████████████▎                    | 3273/5000 [7:31:11<2:59:32,  6.24s/it]

Reseting Memory...


Parsing rows:  71%|██████████████████████████████████████████▊                 | 3570/5000 [8:03:48<3:26:28,  8.66s/it]

Reseting Memory...


Parsing rows:  76%|█████████████████████████████████████████████▉              | 3825/5000 [8:31:11<3:21:39, 10.30s/it]

Reseting Memory...


Parsing rows:  81%|████████████████████████████████████████████████▉           | 4074/5000 [8:57:19<1:53:59,  7.39s/it]

Reseting Memory...


Parsing rows:  85%|███████████████████████████████████████████████████         | 4259/5000 [9:16:35<1:16:54,  6.23s/it]

Reseting Memory...


Parsing rows:  90%|███████████████████████████████████████████████████████▊      | 4505/5000 [9:42:14<51:56,  6.30s/it]

Reseting Memory...


Parsing rows:  94%|█████████████████████████████████████████████████████████▎   | 4697/5000 [10:02:15<31:00,  6.14s/it]

Reseting Memory...


Parsing rows:  99%|████████████████████████████████████████████████████████████▎| 4940/5000 [10:27:30<06:10,  6.18s/it]

Reseting Memory...


Parsing rows: 100%|█████████████████████████████████████████████████████████████| 5000/5000 [10:33:52<00:00,  7.61s/it]


Processing Model: llava:7b (Model 2/3)


Parsing rows: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [6:44:47<00:00,  4.86s/it]


Processing Model: llava-llama3:8b (Model 3/3)


Parsing rows: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 5000/5000 [7:53:22<00:00,  5.68s/it]


In [14]:
work_df.head()

Unnamed: 0,n_citi,bed,bath,sqft,price,image,gemma3:4b_condition,gemma3:4b_size,gemma3:4b_material,gemma3:4b_uniqueness,...,llava:7b_condition,llava:7b_size,llava:7b_material,llava:7b_uniqueness,llava:7b_maintenance,llava-llama3:8b_condition,llava-llama3:8b_size,llava-llama3:8b_material,llava-llama3:8b_uniqueness,llava-llama3:8b_maintenance
4772,-0.530372,-0.489366,-0.472771,-0.47392,898000,houses_preprocessed/4793.jpg,6.0,6.0,6.0,4.0,...,6.0,5.0,4.0,8.0,8.0,9.0,1.0,5.0,5.0,7.0
3707,1.018093,-0.489366,-0.472771,-0.753836,554900,houses_preprocessed/3727.jpg,5.0,4.0,4.0,4.0,...,5.0,8.0,7.0,5.0,9.0,7.0,8.0,6.0,4.0,8.0
14159,-1.286806,0.477001,0.570296,0.513102,969000,houses_preprocessed/14333.jpg,6.0,6.0,6.0,4.0,...,8.0,6.0,8.0,6.0,8.0,8.0,7.0,9.0,6.0,8.0
6934,0.145969,1.443367,0.570296,0.771561,634900,houses_preprocessed/7055.jpg,6.0,6.0,6.0,4.0,...,7.0,9.0,8.0,6.0,9.0,5.0,1.0,9.0,8.0,9.0
13453,0.341752,-1.455732,-1.515838,-1.502881,397000,houses_preprocessed/13627.jpg,5.0,4.0,5.0,4.0,...,5.0,5.0,4.0,1.0,8.0,5.0,7.0,8.0,8.0,8.0


# Experimental set up

## Train and Test the models on the same data partioning

In [15]:
# Split the df with LLM generated features into train and test
X_work_df = work_df[work_df.columns.difference(['price'])]
y_work_df = work_df['price']
X_train, X_test, y_train, y_test = train_test_split(X_work_df, y_work_df, test_size=0.2, random_state=42)

### Feature Generation dfs

In [16]:
# Train data feature generation
X_train_tab_fg = X_train[X_train.columns.difference(['image'])] # Not yet np array, we will do a further spliting (we need column names)
X_train_img = X_train['image'] # pd

# Test data feature generation
X_test_tab_fg = X_test[X_test.columns.difference(['image'])] # Not yet np array, we will do a further spliting (we need column names) 
X_test_img = X_test['image'] # pd

# Print shapes
print("FG Training Data Shapes:")
print(f"Tabular features: {X_train_tab_fg.shape}")
print(f"Image features: {X_train_img.shape}")
print(f"Target prices: {y_train.shape}")
print("\nFG Test Data Shapes:")
print(f"Tabular features: {X_test_tab_fg.shape}")
print(f"Image features: {X_test_img.shape}")
print(f"Target prices: {y_test.shape}")

FG Training Data Shapes:
Tabular features: (4000, 19)
Image features: (4000,)
Target prices: (4000,)

FG Test Data Shapes:
Tabular features: (1000, 19)
Image features: (1000,)
Target prices: (1000,)


### No Feature Generation dfs
1. Train and Test sets of image and y columns are identical
2. Reduced tabular features (only base features)

In [17]:
base_cols_no_fg = ['n_citi', 'bed', 'bath', 'sqft']

# Train and Test data - no feature generation
X_train_tab = X_train[base_cols_no_fg].values 
X_test_tab = X_test[base_cols_no_fg].values 

# Print shapes
print("FG Training Data Shapes:")
print(f"Tabular features: {X_train_tab.shape}")
print("\nFG Test Data Shapes:")
print(f"Tabular features: {X_test_tab.shape}")

FG Training Data Shapes:
Tabular features: (4000, 4)

FG Test Data Shapes:
Tabular features: (1000, 4)


# No Generated Features Neural Networks

## Neural Networks and Models

### Base NN and Resnet

In [18]:
def base_nn(input_size_tabular):
    # Image processing branch
    img_input = Input(shape=(311, 415, 3), name='image_input')
    x = Conv2D(32, (3, 3), activation='relu')(img_input)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(64, (3, 3), activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    x = Conv2D(128, (3, 3), activation='relu')(x)
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    
    # Tabular data processing branch
    tabular_input = Input(shape=(input_size_tabular,), name='tabular_input')
    y = Dense(64, activation='relu')(tabular_input)
    y = Dense(32, activation='relu')(y)
    
    # Combine both branches
    combined = Concatenate()([x, y])
    z = Dense(64, activation='relu')(combined)
    output = Dense(1)(z)  # Regression output for price prediction
    
    nn_model = Model(inputs=[img_input, tabular_input], outputs=output)
    
    # Compile the model
    nn_model.compile(optimizer='adam',
                  loss='mae',
                  metrics=['mae', 'R2Score'])
    
    # Display model summary debug
    # nn_model.summary()

    return nn_model

In [19]:
def resnet_nn(input_size_tabular):
    # Image processing branch with pre-trained ResNet50
    res_net = ResNet50(weights='imagenet', include_top=False, input_shape=(311, 415, 3))
    
    # Unfreeze only the last 10 layers of resnet (fine-tuning) 
    res_net.trainable = False 
    for layer in res_net.layers[-10:]:
        layer.trainable = True

    # Image processing branch
    img_input = Input(shape=(311, 415, 3), name='image_input')
    x = res_net(img_input)
    x = GlobalAveragePooling2D()(x)
    x = Dense(128, activation='relu')(x)
    
    # Tabular data processing branch
    tabular_input = Input(shape=(input_size_tabular,), name='tabular_input')
    y = Dense(64, activation='relu')(tabular_input)
    y = Dense(32, activation='relu')(y)
    
    # Combine both branches
    combined = Concatenate()([x, y])
    z = Dense(64, activation='relu')(combined)
    output = Dense(1)(z)  # Regression output for price prediction
    
    # Define the model
    res_net_model = Model(inputs=[img_input, tabular_input], outputs=output)
    
    # Compile the model
    res_net_model.compile(optimizer='adam', 
                          loss='mae',
                          metrics=['mae', 'R2Score'])
   
    # Display model summary debug
    # res_net_model.summary()

    return res_net_model

In [20]:
'''
I did not write this code, the code is from: https://www.tensorflow.org/tutorials/load_data/images
It helps us train the NN more dynamically, it loads images on the go, such that not all RAM is used up.
It does try to maximise RAM usage this is basically what the tf.data.AUTOTUNE does.
'''

# Loads an image and normalizes it from [0,1]
def process_example(image_path, tabular_features, label):
    # Load raw bytes and convert to RGB
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)

    # Normalize image to [0, 1] and convert to float32
    image = tf.image.convert_image_dtype(image, tf.float32)

    return (image, tabular_features), label


# Creates on the fly data sets to train/test the model, we need this to not exceed memory
def create_dataset(image_paths, tabular_data, labels, shuffle=True):
    # Convert to tensors
    image_paths = tf.convert_to_tensor(image_paths)
    tabular_data = tf.convert_to_tensor(tabular_data, dtype=tf.float32)
    labels = tf.convert_to_tensor(labels, dtype=tf.float32)

    # Build dataset
    dataset = tf.data.Dataset.from_tensor_slices((image_paths, tabular_data, labels))
    dataset = dataset.map(lambda img, tab, lbl: process_example(img, tab, lbl), num_parallel_calls=tf.data.AUTOTUNE)
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(image_paths))
    
    dataset = dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)
    
    return dataset


def train_and_evaluate_nn(nn, 
                          X_train_img_paths, X_train_tab, y_train,
                          X_test_img_paths, X_test_tab, y_test,
                          verbose=1):

    # Dynamic dataset loading
    train_ds = create_dataset(X_train_img_paths, X_train_tab, y_train, shuffle=True) # Shuffle to break ordering
    test_ds = create_dataset(X_test_img_paths, X_test_tab, y_test, shuffle=False) # No shuffle, we arent learning, just predicting

    # Train and Test
    history = nn.fit(train_ds, epochs=epochs, verbose=verbose)
    test_loss, test_mae, r2 = nn.evaluate(test_ds, verbose=0)

    return history, test_loss, test_mae, r2

### Logistic regression

In [21]:
def train_and_evaluate_lin_model(model, X_train_tab, y_train, X_test_tab, y_test):
    # Train the model
    model.fit(X_train_tab, y_train)
    
    # Evaluate the model
    y_test_pred = model.predict(X_test_tab)
    mae_test = mean_absolute_error(y_test, y_test_pred)
    r2 = r2_score(y_test, y_test_pred)
    
    return mae_test, r2

### Train and Evaluate

In [22]:
# Create NNs with tabular features = 4 (n_citi, bed, bath, sqft)
nn_base = base_nn(4)
nn_resnet = resnet_nn(4)
lin = LinearRegression()

In [23]:
# NN
print("Training Base NN")
nn_base_hist, _, nn_base_mae, nn_base_r2 = train_and_evaluate_nn(nn_base, X_train_img, X_train_tab, y_train, X_test_img, X_test_tab, y_test)
print(f"NN Base MAE: {nn_base_mae:.0f}\nNN Base R2: {nn_base_r2:.2f}")

# Try to clear NN from memory
K.clear_session()
gc.collect()

Training Base NN
Epoch 1/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m348s[0m 1s/step - R2Score: -2.0636 - loss: 528141.9375 - mae: 528141.9375
Epoch 2/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m340s[0m 1s/step - R2Score: -0.1374 - loss: 289905.4688 - mae: 289905.4688
Epoch 3/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m326s[0m 1s/step - R2Score: -0.1071 - loss: 264667.0938 - mae: 264667.0938
Epoch 4/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m300s[0m 1s/step - R2Score: -0.0529 - loss: 268879.3750 - mae: 268879.3750
Epoch 5/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m296s[0m 1s/step - R2Score: 0.0878 - loss: 245842.0156 - mae: 245842.0156
Epoch 6/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m301s[0m 1s/step - R2Score: 0.1823 - loss: 246568.2969 - mae: 246568.2969
Epoch 7/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 1s/step - R2Score: 0.2042 - loss: 23

0

In [24]:
# Resnet
print("Training Resnet")
nn_resnet_hist, _, nn_resnet_mae, nn_resnet_r2 = train_and_evaluate_nn(nn_resnet, X_train_img, X_train_tab, y_train, X_test_img, X_test_tab, y_test)
print(f"Resnet MAE: {nn_resnet_mae:.0f}\nResnet R2: {nn_resnet_r2:.2f}")

# Try to clear NN from memory
K.clear_session()
gc.collect()

Training Resnet
Epoch 1/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m670s[0m 3s/step - R2Score: -3.4295 - loss: 698511.6250 - mae: 698511.6250
Epoch 2/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m665s[0m 3s/step - R2Score: -0.7888 - loss: 360914.3750 - mae: 360914.3750
Epoch 3/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m637s[0m 3s/step - R2Score: 0.0782 - loss: 249789.4375 - mae: 249789.4375
Epoch 4/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m663s[0m 3s/step - R2Score: 0.1251 - loss: 250966.8750 - mae: 250966.8750
Epoch 5/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m699s[0m 3s/step - R2Score: 0.2241 - loss: 235338.7969 - mae: 235338.7969
Epoch 6/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m642s[0m 3s/step - R2Score: 0.2663 - loss: 224944.4062 - mae: 224944.4062
Epoch 7/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m663s[0m 3s/step - R2Score: 0.2938 - loss: 22105

0

In [25]:
# LR
print("Training LR")
lr_mae, lr_r2 = train_and_evaluate_lin_model(lin, X_train_tab, y_train, X_test_tab, y_test)
print(f"LR MAE: {lr_mae:.0f}\nLR R2: {lr_r2:.2f}")

Training LR
LR MAE: 227976
LR R2: 0.35


# Generated Features Neural Networks
1. We have to split the LLM generated data into train and test. Such that we can experiment the performance difference.
2. We have to create a new architecture for the NN, which are exactly the same except now the input sizes are different, because we have more tabular features generated by the LLM.

## Comparison Feature Generation 

In [26]:
def create_comparison(llm, 
                      nn_base_mae_fg, nn_resnet_mae_fg, lr_mae_fg, nn_base_mae, nn_resnet_mae, lr_mae,
                      nn_base_r2_fg, nn_resnet_r2_fg, lr_r2_fg, nn_base_r2, nn_resnet_r2, lr_r2
                     ):
    
    # Create a dictionary with the model names and their performance metrics
    comparison = {
        'Model': ['FG: NN_base ' + llm, 'FG: NN_ResNet ' + llm, 'FG: LR ' + llm, 'NN_base', 'NN_ResNet', 'LR'],
        'MAE': [
            round(nn_base_mae_fg),
            round(nn_resnet_mae_fg),
            round(lr_mae_fg),
            round(nn_base_mae),
            round(nn_resnet_mae),
            round(lr_mae)
        ],
        'R2': [
            round(nn_base_r2_fg, 3),
            round(nn_resnet_r2_fg, 3),
            round(lr_r2_fg, 3),
            round(nn_base_r2, 3),
            round(nn_resnet_r2, 3),
            round(lr_r2, 3)
        ]
    }
    
    # Make into df
    comparison_df = pd.DataFrame(comparison).set_index("Model")

    # Display df
    print(f"\nComparison FG vs no FG for {llm}")
    display(comparison_df)
    print()

In [27]:
import logging

# Supress retracing warning
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # '0' = all messages, '3' = fatal only
tf.get_logger().setLevel(logging.ERROR)

In [28]:
# For each LLM
for llm in llms:
    # Should be 9 features --> 5 metrics cols plus 4 base cols
    nn_base_fg = base_nn(9)
    nn_resnet_fg = resnet_nn(9)
    lin_fg = LinearRegression()

    # Generated features by LLM
    fg_cols = [f"{llm}_{metric}" for metric in metrics]
    
    # Per LLM selected columns (base and LLM generated metrics)
    base_plus_fg_cols = ['n_citi', 'bed', 'bath', 'sqft'] + fg_cols

    # NN
    print("Training Base NN")
    _, _, nn_base_mae_fg, nn_base_r2_fg = train_and_evaluate_nn(nn_base_fg, 
                                                                     X_train_img, X_train_tab_fg[base_plus_fg_cols].values, y_train, 
                                                                     X_test_img, X_test_tab_fg[base_plus_fg_cols].values, y_test)
    
    # Resnet
    print("\nTraining Resnet")
    _, _, nn_resnet_mae_fg, nn_resnet_r2_fg = train_and_evaluate_nn(nn_resnet_fg, 
                                                                           X_train_img, X_train_tab_fg[base_plus_fg_cols].values, y_train, 
                                                                           X_test_img, X_test_tab_fg[base_plus_fg_cols].values, y_test)
    
    # LR
    lr_mae_fg, lr_r2_fg = train_and_evaluate_lin_model(lin_fg, 
                                                       X_train_tab_fg[base_plus_fg_cols].values, y_train,
                                                       X_test_tab_fg[base_plus_fg_cols].values, y_test)

    # Create and display the comparison per LLM
    create_comparison(llm, 
                      nn_base_mae_fg, nn_resnet_mae_fg, lr_mae_fg, nn_base_mae, nn_resnet_mae, lr_mae,
                      nn_base_r2_fg, nn_resnet_r2_fg, lr_r2_fg, nn_base_r2, nn_resnet_r2, lr_r2)

    # Try to clear NN from memory
    K.clear_session()
    gc.collect()

Training Base NN
Epoch 1/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m336s[0m 1s/step - R2Score: -2.3443 - loss: 532745.3750 - mae: 532745.3750
Epoch 2/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m322s[0m 1s/step - R2Score: -0.1493 - loss: 269796.4062 - mae: 269796.4062
Epoch 3/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m386s[0m 2s/step - R2Score: -0.1351 - loss: 279305.0312 - mae: 279305.0312
Epoch 4/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m363s[0m 1s/step - R2Score: -0.0981 - loss: 277794.6562 - mae: 277794.6562
Epoch 5/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 1s/step - R2Score: -0.0474 - loss: 269216.5938 - mae: 269216.5938
Epoch 6/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m343s[0m 1s/step - R2Score: 0.0626 - loss: 243594.1875 - mae: 243594.1875
Epoch 7/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m344s[0m 1s/step - R2Score: 0.1411 - loss: 2

Unnamed: 0_level_0,MAE,R2
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
FG: NN_base gemma3:4b,227921,0.336
FG: NN_ResNet gemma3:4b,222521,0.339
FG: LR gemma3:4b,223438,0.38
NN_base,225426,0.341
NN_ResNet,394746,-0.681
LR,227976,0.354



Training Base NN
Epoch 1/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m366s[0m 1s/step - R2Score: -2.2125 - loss: 526232.2500 - mae: 526232.2500
Epoch 2/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m362s[0m 1s/step - R2Score: -0.1410 - loss: 279496.0000 - mae: 279496.0000
Epoch 3/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m363s[0m 1s/step - R2Score: -0.1403 - loss: 281180.5000 - mae: 281180.5000
Epoch 4/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m373s[0m 1s/step - R2Score: -0.1210 - loss: 283355.2500 - mae: 283355.2500
Epoch 5/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m359s[0m 1s/step - R2Score: -0.0308 - loss: 264731.3125 - mae: 264731.3125
Epoch 6/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m360s[0m 1s/step - R2Score: 0.0555 - loss: 258191.6406 - mae: 258191.6406
Epoch 7/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m365s[0m 1s/step - R2Score: 0.1444 - loss: 

Unnamed: 0_level_0,MAE,R2
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
FG: NN_base llava:7b,228054,0.311
FG: NN_ResNet llava:7b,291150,-0.141
FG: LR llava:7b,225858,0.364
NN_base,225426,0.341
NN_ResNet,394746,-0.681
LR,227976,0.354



Training Base NN
Epoch 1/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m273s[0m 1s/step - R2Score: -2.1219 - loss: 529647.7500 - mae: 529647.7500
Epoch 2/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 1s/step - R2Score: -0.1499 - loss: 286559.9375 - mae: 286559.9375
Epoch 3/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 1s/step - R2Score: -0.1337 - loss: 279824.0000 - mae: 279824.0000
Epoch 4/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 1s/step - R2Score: -0.0990 - loss: 273704.5625 - mae: 273704.5625
Epoch 5/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m269s[0m 1s/step - R2Score: -0.0557 - loss: 267502.6562 - mae: 267502.6562
Epoch 6/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m270s[0m 1s/step - R2Score: 0.0473 - loss: 253997.0625 - mae: 253997.0625
Epoch 7/25
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m271s[0m 1s/step - R2Score: 0.1342 - loss: 

Unnamed: 0_level_0,MAE,R2
Model,Unnamed: 1_level_1,Unnamed: 2_level_1
FG: NN_base llava-llama3:8b,230746,0.325
FG: NN_ResNet llava-llama3:8b,245459,0.319
FG: LR llava-llama3:8b,228976,0.35
NN_base,225426,0.341
NN_ResNet,394746,-0.681
LR,227976,0.354



