In [14]:
import pandas as pd
import numpy as np
import joblib

# Load the model
model = joblib.load('xgbr_price_predictor.pkl')
# Load the feature names
feature_names = joblib.load('feature_names.pkl')

# Function to preprocess new data
def preprocess_new_data(new_data, feature_names):
    # One-hot encode the categorical columns
    one_hot_encode_cols = ['brand', 'model', 'fuel_type', 'gearbox', 'color', 'seller', 'body_type', 'drivetrain', 'country', 'condition', 'upholstery_color']
    new_data = pd.get_dummies(new_data, columns=one_hot_encode_cols)
    
    # Ensure all required columns are present in the new data
    missing_cols = set(feature_names) - set(new_data.columns)
    
    # Add missing columns with zero values at once
    missing_cols_df = pd.DataFrame(0, index=new_data.index, columns=list(missing_cols))
    new_data = pd.concat([new_data, missing_cols_df], axis=1)
    
    # Reorder columns to match the model's training data
    new_data = new_data[feature_names]
    
    return new_data

# Example new data (replace with your actual data)
new_data = pd.DataFrame({
    'mileage': [50000],
    'power': [120],
    'engine_size': [1998],
    'doors': [4],
    'seats': [5],
    'brand': ['audi'],
    'model': ['a4'],
    'fuel_type': ['petrol'],
    'gearbox': ['manual'],
    'color': ['black'],
    'seller': ['private'],
    'body_type': ['sedan'],
    'drivetrain': ['front'],
    'country': ['germany'],
    'condition': ['used'],
    'upholstery_color': ['black'],
    'emission_class': [4],
    'year': [1900]
})

# Preprocess the new data
preprocessed_data = preprocess_new_data(new_data, feature_names)

# Convert to numpy array for prediction
X_new = np.array(preprocessed_data)

# Make predictions
predictions = model.predict(X_new)

# Print the predicted values
for i, prediction in enumerate(predictions):
    print(f'Prediction for input {i}: {prediction}')


Prediction for input 0: 25590.689453125
