In [5]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import RootMeanSquaredError
from sklearn.preprocessing import MinMaxScaler

In [6]:
df = pd.read_csv("data/train_clean.csv")

df["imageid"] = df.index.astype(str).str.zfill(5) + ".jpg"

print("Total rows:", len(df))
print("Sample image ids:", df["imageid"].head())

scaler = MinMaxScaler()

df["price_norm"] = scaler.fit_transform(df[["price"]])



Total rows: 16209
Sample image ids: 0    00000.jpg
1    00001.jpg
2    00002.jpg
3    00003.jpg
4    00004.jpg
Name: imageid, dtype: object


In [8]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1./255, validation_split=0.2)

train_loader = datagen.flow_from_dataframe(
    df, directory='satellite_images/', 
    x_col='imageid', 
    y_col='price_norm',  # Normalized 0-1
    target_size=(224,224), 
    batch_size=32, 
    class_mode='raw', 
    subset='training'
)

val_loader = datagen.flow_from_dataframe(
    df, directory='satellite_images/', 
    x_col='imageid', 
    y_col='price_norm',
    target_size=(224,224), 
    batch_size=32, 
    class_mode='raw', 
    subset='validation'
)

print(f"Train: {train_loader.samples}, Val: {val_loader.samples}")
batch_x, batch_y = next(train_loader)


Found 12968 validated image filenames.
Found 3241 validated image filenames.
Train: 12968, Val: 3241


In [9]:
def r2_metric(y_true, y_pred):
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(y_pred, tf.float32)

    ss_res = tf.reduce_sum(tf.square(y_true - y_pred))
    ss_tot = tf.reduce_sum(tf.square(y_true - tf.reduce_mean(y_true)))

    return 1 - ss_res / (ss_tot + tf.keras.backend.epsilon())


In [None]:
from tensorflow.keras.layers import Dropout

model2 = Sequential([
    Conv2D(32, (3,3), activation="relu", input_shape=(224,224,3)),
    MaxPooling2D(2,2),
    
    Conv2D(64, (3,3), activation="relu"),
    MaxPooling2D(2,2),
    
    Conv2D(128, (3,3), activation="relu"),
    MaxPooling2D(2,2),
    
    GlobalAveragePooling2D(),
    Dense(64, activation="relu"),      
    Dropout(0.3),                      
    Dense(32, activation="relu"),      
    Dense(1, activation="linear")
])

model2.compile(optimizer=Adam(1e-4), loss="mse", metrics=[RootMeanSquaredError(), r2_metric])

from tensorflow.keras.callbacks import EarlyStopping

history = model.fit(
    train_loader, 
    epochs=12,
    validation_data=val_loader,
    callbacks=[EarlyStopping(patience=3, restore_best_weights=True, monitor='val_rmse')],
    verbose=1
)

print("Best val_rmse:", min(history.history['val_rmse']))
print("Best val_r2:", max(history.history['r2_metric']))



In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error

price_min, price_max = df['price'].min(), df['price'].max()

y_pred_norm = model2.predict(val_loader)
y_pred_raw = y_pred_norm * (price_max - price_min) + price_min

val_rmse_raw = np.sqrt(mean_squared_error(
    val_loader.labels * (price_max - price_min) + price_min, 
    y_pred_raw
))

mape = np.mean(np.abs((val_loader.labels - y_pred_norm.flatten()) / (val_loader.labels + 1e-8))) * 100

print(f"Val RMSE (raw scale): {val_rmse_raw:,.0f}")
print(f"MAPE: {mape:.1f}%")


In [None]:
from sklearn.metrics import r2_score
import numpy as np

y_true = val_gen.labels
y_pred = model.predict(val_gen).flatten()

print("Final Validation RÂ²:", r2_score(y_true, y_pred))

In [None]:
model.save("dependables/cnn_image_model.keras")