In [None]:
def build_price_prediction_model(num_classes=1):
    """Build a price prediction model using ResNet152 with a regression head"""
    from tensorflow import ResNet50
    from tensorflow import Model
    from tensorflow import Dense, GlobalAveragePooling2D, Input, Concatenate
    from tensorflow import Adam
    
    # Image input
    img_input = Input(shape=(224, 224, 3))
    
    # Use ResNet50 as base model
    base_model = ResNet50(include_top=False, weights='imagenet', input_tensor=img_input)
    
    # Freeze base model layers
    for layer in base_model.layers:
        layer.trainable = False
        
    # Add classification head
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation='relu')(x)
    x = Dense(512, activation='relu')(x)
    
    # For price prediction, use linear activation (regression)
    predictions = Dense(num_classes, activation='linear')(x)
    
    # Create model
    model = Model(inputs=base_model.input, outputs=predictions)
    
    # Compile model with mean squared error loss (for regression)
    model.compile(optimizer=Adam(lr=0.001), 
                  loss='mean_squared_error',
                  metrics=['mean_absolute_error'])
    
    return model

In [None]:
def train_price_prediction_model(data_dir='data'):
    """Train the price prediction model"""
    import json
    import os
    import numpy as np
    from tensorflow import ImageDataGenerator
    from tensorflow import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
    
    # Load car price mapping
    with open(os.path.join(data_dir, 'car_prices.json'), 'r') as f:
        car_prices = json.load(f)
    
    # Data generators with price regression
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    valid_datagen = ImageDataGenerator(rescale=1./255)
    
    # Build model
    model = build_price_prediction_model()
    
    # Callbacks
    callbacks = [
        ModelCheckpoint('model_prices.h5', monitor='val_mean_absolute_error', save_best_only=True),
        EarlyStopping(patience=10, monitor='val_mean_absolute_error'),
        ReduceLROnPlateau(factor=0.1, patience=5, monitor='val_mean_absolute_error')
    ]
    
    # Custom generators that provide both images and prices
    def generate_data(generator, directory, batch_size, car_prices):
        gen = generator.flow_from_directory(
            directory,
            target_size=(224, 224),
            batch_size=batch_size,
            class_mode='sparse'
        )
        
        while True:
            X_batch, y_batch = next(gen)
            # Map class indices to prices
            price_batch = np.array([car_prices[gen.classes_to_indices[i]] for i in y_batch])
            yield X_batch, price_batch
    
    # Train model
    model.fit(
        generate_data(train_datagen, os.path.join(data_dir, 'train'), batch_size=32, car_prices=car_prices),
        steps_per_epoch=100,
        epochs=30,
        validation_data=generate_data(valid_datagen, os.path.join(data_dir, 'valid'), batch_size=32, car_prices=car_prices),
        validation_steps=50,
        callbacks=callbacks
    )
    
    return model

In [None]:
def evaluate_model(model, test_dir, car_prices):
    """Evaluate the price prediction model on test data
    
    Args:
        model: Trained price prediction model
        test_dir: Directory containing test images organized by car class
        car_prices: Dictionary mapping car labels to prices
    
    Returns:
        Dictionary with evaluation metrics
    """
    import os
    import numpy as np
    import cv2
    from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
    
    # Lists to store actual and predicted prices
    true_prices = []
    predicted_prices = []
    
    # Process each car class in the test directory
    car_classes = os.listdir(test_dir)
    for car_class in car_classes:
        # Skip non-directories
        class_dir = os.path.join(test_dir, car_class)
        if not os.path.isdir(class_dir):
            continue
            
        # Get true price for this car class
        if car_class not in car_prices:
            print(f"Warning: No price found for {car_class}")
            continue
            
        true_price = car_prices[car_class]
        
        # Process each image in this class
        for img_file in os.listdir(class_dir):
            if not img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                continue
                
            # Load and preprocess image
            img_path = os.path.join(class_dir, img_file)
            img = cv2.imread(img_path)
            if img is None:
                print(f"Warning: Could not read {img_path}")
                continue
                
            img = cv2.resize(img, (224, 224))
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
            img = np.expand_dims(img, axis=0)  # Add batch dimension
            
            # Make prediction
            pred_price = model.predict(img, verbose=0)[0][0]
            
            # Store results
            true_prices.append(true_price)
            predicted_prices.append(pred_price)
            
    # Calculate metrics
    true_prices = np.array(true_prices)
    predicted_prices = np.array(predicted_prices)
    
    mae = mean_absolute_error(true_prices, predicted_prices)
    rmse = np.sqrt(mean_squared_error(true_prices, predicted_prices))
    r2 = r2_score(true_prices, predicted_prices)
    
    # Calculate mean percentage error
    mape = np.mean(np.abs((true_prices - predicted_prices) / true_prices)) * 100
    
    # Print results
    print(f"Evaluation Results:")
    print(f"Mean Absolute Error: ${mae:.2f}")
    print(f"Root Mean Squared Error: ${rmse:.2f}")
    print(f"Mean Absolute Percentage Error: {mape:.2f}%")
    print(f"R² Score: {r2:.4f}")
    
    # Optional: Visualize some predictions
    import matplotlib.pyplot as plt
    plt.figure(figsize=(10, 6))
    plt.scatter(true_prices, predicted_prices, alpha=0.5)
    plt.plot([min(true_prices), max(true_prices)], [min(true_prices), max(true_prices)], 'r--')
    plt.xlabel('True Prices ($)')
    plt.ylabel('Predicted Prices ($)')
    plt.title('Car Price Prediction Performance')
    plt.savefig('price_prediction_performance.png')
    plt.show()
    
    return {
        'mae': mae,
        'rmse': rmse,
        'mape': mape,
        'r2': r2,
        'n_samples': len(true_prices)
    }

In [None]:
# Only run this cell when you're ready to train
import json
from tqdm import tqdm
import prepare_dataset

# Step 1: Prepare dataset
print("Preparing dataset...")
car_prices = prepare_dataset(combined_cars_df, "./images/cars")

# Step 2: Train model
print("Training model...")
model = train_price_prediction_model()

# Step 3: Evaluate on test set
print("Evaluating model...")
evaluate_model(model, 'data/test', car_prices)