In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from pathlib import Path

image_table = pd.read_csv('D:\\CPEN355_project\\Data\\Image_table.csv')
price_table = pd.read_csv('D:\\CPEN355_project\\Data\\Price_table.csv')

merged_data = pd.merge(image_table[['Genmodel_ID', 'Image_name']],
                       price_table[['Genmodel_ID', 'Entry_price']],
                       on='Genmodel_ID')

image_counts = merged_data['Genmodel_ID'].value_counts()
valid_genmodels = image_counts[image_counts >= 300].index
filtered_data = merged_data[merged_data['Genmodel_ID'].isin(valid_genmodels)]

def sample_images(group):
    if len(group) > 500:
        return group.sample(n=500, random_state=42)
    else:
        return group

filtered_data = filtered_data.groupby('Genmodel_ID').apply(sample_images).reset_index(drop=True)

image_directory = 'D:\\CPEN355_project\\355DataSet\\DVM_noNest' 
filtered_data['Image_path'] = filtered_data['Image_name'].apply(
    lambda x: f"{image_directory}\\{x}"
)

def image_exists(path):
    return Path(path).is_file()

exists_mask = filtered_data['Image_path'].apply(image_exists)
filtered_data = filtered_data[exists_mask].reset_index(drop=True)

image_paths = filtered_data['Image_path']
prices = filtered_data['Entry_price'].values

img_size = 224 
batch_size = 32

scaler = StandardScaler()
prices_scaled = scaler.fit_transform(prices.reshape(-1, 1))

X_train_paths, X_test_paths, y_train_scaled, y_test_scaled = train_test_split(
    image_paths, prices_scaled, test_size=0.2, random_state=42)

def preprocess_image(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [img_size, img_size])
    image = image / 255.0
    return image, label

test_dataset = tf.data.Dataset.from_tensor_slices((X_test_paths.values, y_test_scaled))
test_dataset = test_dataset.map(preprocess_image, num_parallel_calls=tf.data.AUTOTUNE)
test_dataset = test_dataset.batch(batch_size).prefetch(tf.data.AUTOTUNE)

model = tf.keras.models.load_model('best_cnn_model.h5')

predictions_scaled = model.predict(test_dataset)
predictions_scaled = np.concatenate(predictions_scaled, axis=0)
y_test_scaled_flat = np.concatenate([y for x, y in test_dataset], axis=0)

predictions = scaler.inverse_transform(predictions_scaled.reshape(-1, 1)).flatten()
y_test = scaler.inverse_transform(y_test_scaled_flat.reshape(-1, 1)).flatten()

r2 = r2_score(y_test, predictions)
print(f"R²: {r2}")


  filtered_data = filtered_data.groupby('Genmodel_ID').apply(sample_images).reset_index(drop=True)


R²: 0.9363492973819664
