In [3]:
import pandas as pd
import numpy as np
import joblib
import pandas as pd

# Load the model
model = joblib.load('xgbr_price_predictor.pkl')
# Load the feature names
feature_names = joblib.load('feature_names.pkl')

# Function to preprocess new data
def preprocess_new_data(new_data, feature_names):
    # One-hot encode the categorical columns
    one_hot_encode_cols = ['brand', 'model', 'fuel_type', 'gearbox', 'color', 'seller', 'body_type', 'drivetrain', 'country', 'condition', 'upholstery_color']
    new_data = pd.get_dummies(new_data, columns=one_hot_encode_cols)
    
    # Ensure all required columns are present in the new data
    missing_cols = set(feature_names) - set(new_data.columns)
    
    # Add missing columns with zero values at once
    missing_cols_df = pd.DataFrame(0, index=new_data.index, columns=list(missing_cols))
    new_data = pd.concat([new_data, missing_cols_df], axis=1)
    
    # Reorder columns to match the model's training data
    new_data = new_data[feature_names]

    # convert into pandas
    new_data = pd.DataFrame(new_data, columns=feature_names)
    new_data.head()
    
    return new_data

# Example new data (replace with your actual data)
new_data = pd.DataFrame({
    'brand': ['audi'],
    'model': ['a4'],
    #'mileage': [50000],
    'color': ['black'],
    'gearbox': ['manual'],
    #'power': [120],
    #'engine_size': [1998],
    'seller': ['private'],
    'body_type': ['sedan'],
    #'doors': [4],
    #'seats': [5],
    'drivetrain': ['front'],
    #'emission_class': [4],
    'condition': ['used'],
    'upholstery_color': ['black'],
    #'year': [1950]
    'fuel_type': ['petrol'],
    'country': ['germany'],
})

# Preprocess the new data
preprocessed_data = preprocess_new_data(new_data, feature_names)

# Convert to numpy array for prediction
X_new = np.array(preprocessed_data)

# Make predictions
predictions = model.predict(X_new)

# Print the predicted values
for i, prediction in enumerate(predictions):
    print(f'Prediction for input {i}: {prediction}')


Prediction for input 0: 21863.984375


In [4]:
# load the cleaned data
data = pd.read_csv('cleaned_cars.csv')

# get the first 1 rows of the data
data.head(1)


Unnamed: 0,url,brand,model,price,mileage,fuel_type,color,gearbox,power,engine_size,seller,body_type,doors,seats,drivetrain,emission_class,condition,upholstery_color,year,country
0,https://www.autoscout24.com/offers/alfa-romeo-...,alfa romeo,159,5500.0,233685.0,diesel,red,manual,136.0,1956.0,dealer,sedan,4,5,front,134.0,used,metallic,2011.0,it


In [5]:
# want to resize the images in assets/brand_logo to 128x128 pixels
from PIL import Image
import os

# Directory containing the images
image_dir = 'assets/brand_logo'

# Function to resize images
def resize_images(image_dir, size=(128, 128)):
    for filename in os.listdir(image_dir):
        if filename.endswith('.png'):
            with Image.open(os.path.join(image_dir, filename)) as img:
                img_resized = img.resize(size)
                img_resized.save(os.path.join(image_dir, filename))

# Resize the images
resize_images(image_dir)

In [20]:
# get max and min for each column
max_values = data.max()
min_values = data.min()

print(max_values['emission_class'])
print(min_values['emission_class'])
print(max_values['power'])
print(min_values['power'])
print(max_values['engine_size'])
print(min_values['engine_size'])
print(max_values['mileage'])
print(min_values['mileage'])


2370.0
0.0
1020.0
1.0
67500.0
0.0
9999999.0
0.0


In [7]:
feature_names

['mileage',
 'power',
 'engine_size',
 'doors',
 'seats',
 'emission_class',
 'year',
 'brand_alfa romeo',
 'brand_aston martin',
 'brand_audi',
 'brand_bentley',
 'brand_bmw',
 'brand_bugatti',
 'brand_cadillac',
 'brand_chevrolet',
 'brand_citroen',
 'brand_corvette',
 'brand_cupra',
 'brand_dacia',
 'brand_ferrari',
 'brand_ford',
 'brand_honda',
 'brand_hyundai',
 'brand_jaguar',
 'brand_jeep',
 'brand_kia',
 'brand_lamborghini',
 'brand_land rover',
 'brand_lexus',
 'brand_maserati',
 'brand_mazda',
 'brand_mclaren',
 'brand_mini',
 'brand_mitsubishi',
 'brand_nissan',
 'brand_opel',
 'brand_peugeot',
 'brand_porsche',
 'brand_renault',
 'brand_rolls-royce',
 'brand_seat',
 'brand_skoda',
 'brand_smart',
 'brand_subaru',
 'brand_suzuki',
 'brand_tesla',
 'brand_toyota',
 'brand_volkswagen',
 'model_alfa romeo_145',
 'model_alfa romeo_147',
 'model_alfa romeo_156',
 'model_alfa romeo_159',
 'model_alfa romeo_166',
 'model_alfa romeo_4c',
 'model_alfa romeo_75',
 'model_alfa romeo_a