In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from xgboost import XGBRegressor
import warnings
from datetime import datetime
warnings.filterwarnings("ignore")

df=pd.read_csv("vehicle_details_new.csv")

df.drop(df[df["fleet_status"]=="ASSIGNED_TO_AUCTION"].index,inplace=True)
df.drop(columns=["expected_sale_price","min_bid_price","sale_date","fleet_status","remarks","vehicle_id"],inplace=True)
df.drop(df[df["sale_price"].isnull()].index,inplace=True)

df["years since manufacture"] = df["year"].apply(lambda a : 2024-a)
df["purchase_date"] = pd.to_datetime(df["purchase_date"])
df["days since purchase"] = (datetime.now() -  df["purchase_date"]).dt.days   
df.drop(columns=["year","purchase_date"],inplace=True)

df["grade"] = df["grade"].replace({'GRADE_0':8,'GRADE_1':7,'GRADE_2':6,'GRADE_3':5,'GRADE_4':4,'GRADE_5':3,'GRADE_6':2,'COMPLETE':1,'SALVAGE':0})
df["color"] = df["color"].replace({'Black     ':'Black','Blue      ':'Blue','Grey      ':'Grey','White     ':'White','Silver    ':'Silver','Silver\xa0\xa0\xa0\xa0':'Silver','Beige     ':'Beige','Baige     ':'Beige','Red       ':'Red','Gray':'Grey','37281':'Red','White\xa0\xa0\xa0\xa0\xa0':'White','Golden    ':'Golden','Lt. Golden':'Golden', 'Brown     ':'Brown'})

df_num = df.select_dtypes(include=np.number)
df_cat = df.select_dtypes(include=object)

encoder ={}
for col in df_cat.columns:
    encoder[col] = {}
    for category in df_cat[col].unique():
        encoder[col][category] = df_num[df_cat[col]==category]["sale_price"].mean()
    df_cat[col] = df_cat[col].map(encoder[col])

X_num = df_num.drop(columns=["sale_price"],axis=True)
X = pd.concat([X_num,df_cat],axis=1)
y = df_num["sale_price"]

xgbregressor = XGBRegressor(random_state=100,n_estimators=100).fit(X,y)

In [42]:
def predict_car_price(manufacture_year,color,make_name,model_name,kilometer,grade,net_book_value,purchase_price,purchase_date):
    grade = {'GRADE_0':8,'GRADE_1':7,'GRADE_2':6,'GRADE_3':5,'GRADE_4':4,'GRADE_5':3,'GRADE_6':2,'COMPLETE':1,'SALVAGE':0}[grade]
    years_since_manufacture = datetime.now().year - manufacture_year
    days_since_purchase = (datetime.now() - datetime.strptime(purchase_date,"%Y-%m-%d %H:%M:%S")).days
    make_value = encoder["make_name"][make_name]
    color_replace = {'Black     ':'Black','Blue      ':'Blue','Grey      ':'Grey','White     ':'White','Silver    ':'Silver','Silver\xa0\xa0\xa0\xa0':'Silver','Beige     ':'Beige','Baige     ':'Beige','Red       ':'Red','Gray':'Grey','37281':'Red','White\xa0\xa0\xa0\xa0\xa0':'White','Golden    ':'Golden','Lt. Golden':'Golden', 'Brown     ':'Brown'}
    if color in color_replace.keys():
        color = color_replace[color]
    if color in encoder["color"].keys():
        color_value = encoder["color"][color]
    else:
        color_value = y.mean()
    if model_name in encoder["model_name"].keys():
        model_value = encoder["model_name"][model_name]
    else:
        model_value = y.mean()
    data = pd.DataFrame({'kilometer':kilometer,'grade':grade,'net_book_value':net_book_value,'purchase_price':purchase_price,
                         'years since manufacture':years_since_manufacture,'days since purchase':days_since_purchase,'color':color_value,
                         'make_name':make_value,'model_name':model_value},index=[0])
    return round(float(xgbregressor.predict(data)[0]),2)

In [63]:
def predict_cars_prices(data):
    df = data.copy()

    df["years since manufacture"] = df["year"].apply(lambda a : 2024-a)
    df["purchase_date"] = pd.to_datetime(df["purchase_date"])
    df["days since purchase"] = (datetime.now() -  df["purchase_date"]).dt.days   
    df.drop(columns=["year","purchase_date"],inplace=True)

    df["grade"] = df["grade"].replace({'GRADE_0':8,'GRADE_1':7,'GRADE_2':6,'GRADE_3':5,'GRADE_4':4,'GRADE_5':3,'GRADE_6':2,'COMPLETE':1,'SALVAGE':0})
    df["color"] = df["color"].replace({'Black     ':'Black','Blue      ':'Blue','Grey      ':'Grey','White     ':'White','Silver    ':'Silver','Silver\xa0\xa0\xa0\xa0':'Silver','Beige     ':'Beige','Baige     ':'Beige','Red       ':'Red','Gray':'Grey','37281':'Red','White\xa0\xa0\xa0\xa0\xa0':'White','Golden    ':'Golden','Lt. Golden':'Golden', 'Brown     ':'Brown'})

    df_num = df.select_dtypes(include=np.number)
    df_cat = df.select_dtypes(include=object)

    encoder ={}
    for col in df_cat.columns:
        encoder[col] = {}
        for category in df_cat[col].unique():
            encoder[col][category] = df_num[df_cat[col]==category]["sale_price"].mean()
        df_cat[col] = df_cat[col].map(encoder[col])

    X_num = df_num.drop(columns=["sale_price"],axis=True)
    X = pd.concat([X_num,df_cat],axis=1)
    X.drop(columns = ["expected_sale_price","min_bid_price","sale_date","fleet_status","remarks","vehicle_id"],inplace=True)

    return xgbregressor.predict(X)

### Predict selling price of single car by entering values

In [43]:
predict_car_price(manufacture_year=2022,
                  color="Blue",
                  make_name="MG",
                  model_name="5",
                  kilometer=50585.0,
                  grade="GRADE_0",
                  net_book_value=26002.35,
                  purchase_price=38300.0,
                  purchase_date="2022-01-23 00:00:00")

26470.79

### Predict selling price of all cars by using a dataset

In [67]:
data = pd.read_csv("vehicle_details_new.csv")
data["Predicted Sale Price"] = predict_cars_prices(data)
data["Percentage Difference"] = round(np.abs((data["sale_price"] - data["Predicted Sale Price"]) / data["sale_price"] * 100),2)
data.to_csv("Prediction output.csv")