In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler
import warnings
warnings.filterwarnings("ignore")

In [4]:
# load csv file 
dataset=pd.read_csv("ford.csv")
dataset

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize
0,Fiesta,2017,12000,Automatic,15944,Petrol,150,57.7,1.0
1,Focus,2018,14000,Manual,9083,Petrol,150,57.7,1.0
2,Focus,2017,13000,Manual,12456,Petrol,150,57.7,1.0
3,Fiesta,2019,17500,Manual,10460,Petrol,145,40.3,1.5
4,Fiesta,2019,16500,Automatic,1482,Petrol,145,48.7,1.0
...,...,...,...,...,...,...,...,...,...
17961,B-MAX,2017,8999,Manual,16700,Petrol,150,47.1,1.4
17962,B-MAX,2014,7499,Manual,40700,Petrol,30,57.7,1.0
17963,Focus,2015,9999,Manual,7010,Diesel,20,67.3,1.6
17964,KA,2018,8299,Manual,5007,Petrol,145,57.7,1.2


In [5]:
# checking for missing values
dataset.isnull().sum()

model           0
year            0
price           0
transmission    0
mileage         0
fuelType        0
tax             0
mpg             0
engineSize      0
dtype: int64

In [6]:
print(dataset['transmission'].unique())
print(dataset['fuelType'].unique())

['Automatic' 'Manual' 'Semi-Auto']
['Petrol' 'Diesel' 'Hybrid' 'Electric' 'Other']


In [7]:
# encoding the categorical transmission column
dataset.replace({'transmission':{'Automatic':0, 'Manual':1, 'Semi-Auto':2}},inplace=True)
# encoding the categorical fuelType column
dataset.replace({'fuelType':{'Petrol':0, 'Diesel':1, 'Hybrid':2, 'Electric':3, 'Other':4}},inplace=True)
dataset

Unnamed: 0,model,year,price,transmission,mileage,fuelType,tax,mpg,engineSize
0,Fiesta,2017,12000,0,15944,0,150,57.7,1.0
1,Focus,2018,14000,1,9083,0,150,57.7,1.0
2,Focus,2017,13000,1,12456,0,150,57.7,1.0
3,Fiesta,2019,17500,1,10460,0,145,40.3,1.5
4,Fiesta,2019,16500,0,1482,0,145,48.7,1.0
...,...,...,...,...,...,...,...,...,...
17961,B-MAX,2017,8999,1,16700,0,150,47.1,1.4
17962,B-MAX,2014,7499,1,40700,0,30,57.7,1.0
17963,Focus,2015,9999,1,7010,1,20,67.3,1.6
17964,KA,2018,8299,1,5007,0,145,57.7,1.2


Splitting the data into features and labels

In [8]:
x=dataset.drop(['model','price'],axis=1)
y=dataset['price']
y

0        12000
1        14000
2        13000
3        17500
4        16500
         ...  
17961     8999
17962     7499
17963     9999
17964     8299
17965     8299
Name: price, Length: 17966, dtype: int64

Lets Standardize the data

In [9]:
scaler=StandardScaler()
scaler.fit(x)

In [10]:
standardized_x=scaler.transform(x)
standardized_x

array([[ 0.06512772, -2.67003231, -0.38099808, ...,  0.59135805,
        -0.02044162, -0.81138621],
       [ 0.55286624,  0.04135139, -0.73335899, ...,  0.59135805,
        -0.02044162, -0.81138621],
       [ 0.06512772,  0.04135139, -0.56013157, ...,  0.59135805,
        -0.02044162, -0.81138621],
       ...,
       [-0.91034931,  0.04135139, -0.83982222, ..., -1.50505332,
         0.92766777,  0.57636151],
       [ 0.55286624,  0.04135139, -0.94269045, ...,  0.51072684,
        -0.02044162, -0.34880364],
       [-0.91034931,  0.04135139, -0.94269045, ..., -1.47280084,
        -0.02044162, -0.81138621]])

In [11]:
x=standardized_x
y=dataset['price']

In [12]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=.1,random_state=42)
print(x.shape, x_train.shape, x_test.shape)
print(y.shape, y_train.shape, y_test.shape)

(17966, 7) (16169, 7) (1797, 7)
(17966,) (16169,) (1797,)


XGB Regressor

In [13]:
# load the model
xgb_model=XGBRegressor()

In [14]:
# fit our training data into model
xgb_model.fit(x_train,y_train)

Model Evaluation

In [15]:
# Prediction on training data
training_data_pred=xgb_model.predict(x_train)
#R2 score on training data
score_1=metrics.r2_score(y_train, training_data_pred)
#Mean absolute error
mae=metrics.mean_absolute_error(y_train, training_data_pred)
print("R2 score on training data:",score_1)
print("Mean absolute error on trainig data:",mae)

R2 score on training data: 0.9531964580818831
Mean absolute error on trainig data: 740.4128602406732


In [16]:
# Prediction on test data
test_data_pred=xgb_model.predict(x_test)
#R2 score on test data
score_1=metrics.r2_score(y_test, test_data_pred)
#Mean absolute error
mae=metrics.mean_absolute_error(y_test, test_data_pred)
print("R2 score on training data:",score_1)
print("Mean absolute error on trainig data:",mae)

R2 score on training data: 0.9116280023596516
Mean absolute error on trainig data: 907.3636198473694


Making Prediction

In [17]:
input_data=(2019,1,10460,0,145,40.3,1.5)
#changing the input into numpy array and reshaping
input_changed=np.array(input_data).reshape(1,-1)
#standardize the input
std_input=scaler.transform(input_changed)
prediction=xgb_model.predict(std_input)
print(prediction)
print("This car price estimation is:",prediction)

[18320.613]
This car price estimation is: [18320.613]


saving our model and scaler

In [18]:
import joblib
# save the model
joblib.dump(xgb_model,'xgb_model.pkl')
#save the standard scale
joblib.dump(xgb_model, 'scaler.pkl')

['scaler.pkl']

In [None]:
import numpy as np
import joblib
import tkinter as tk
from tkinter import messagebox

# Load the model (skip scaler for testing)
model = joblib.load('C:/Users/dell/Desktop/youtube/xgb_model.pkl')

# Function to predict car price
def car_price_prediction():
    try:
        # Get inputs from entry boxes
        year = int(entry_year.get())
        transmission = float(entry_transmission.get())  # Assuming transmission is numeric
        mileage = int(entry_mileage.get())
        fuel_type = float(entry_fuel_type.get())  # Assuming fuel_type is numeric
        tax = int(entry_tax.get())
        mpg = float(entry_mpg.get())
        enginesize = float(entry_enginesize.get())

        # Prepare the input data (without scaling)
        input_data = np.array([[year, transmission, mileage, fuel_type, tax, mpg, enginesize]])

        # Predict the car price using the model directly (no scaler)
        prediction = model.predict(input_data)

        # Display the prediction
        label_result.config(text="Estimated Car Price: " + str(prediction[0]))

    except ValueError:
        messagebox.showerror("Input Error", "Please enter valid numeric values!")

# Set up the Tkinter window
root = tk.Tk()
root.title("Car Price Prediction")

# Create input fields and labels
label_year = tk.Label(root, text="Year (e.g., 2014-2019)")
label_year.pack()
entry_year = tk.Entry(root)
entry_year.pack()

label_transmission = tk.Label(root, text="Transmission (e.g., Automatic=0, Manual=1, Semi-Auto=2)")
label_transmission.pack()
entry_transmission = tk.Entry(root)
entry_transmission.pack()

label_mileage = tk.Label(root, text="Mileage")
label_mileage.pack()
entry_mileage = tk.Entry(root)
entry_mileage.pack()

label_fuel_type = tk.Label(root, text="Fuel Type (e.g., Petrol=0, Diesel=1, Hybrid=2, Electric=3, Other=4)")
label_fuel_type.pack()
entry_fuel_type = tk.Entry(root)
entry_fuel_type.pack()

label_tax = tk.Label(root, text="Tax (e.g., 20-150)")
label_tax.pack()
entry_tax = tk.Entry(root)
entry_tax.pack()

label_mpg = tk.Label(root, text="MPG (e.g., 40.3 - 67.3)")
label_mpg.pack()
entry_mpg = tk.Entry(root)
entry_mpg.pack()

label_enginesize = tk.Label(root, text="Engine Size (e.g., 1.0 - 1.6)")
label_enginesize.pack()
entry_enginesize = tk.Entry(root)
entry_enginesize.pack()

# Create a button to get the prediction
button_predict = tk.Button(root, text="Check Estimated Price", command=car_price_prediction)
button_predict.pack()

# Label to display the prediction result
label_result = tk.Label(root, text="")
label_result.pack()

# Run the Tkinter event loop
root.mainloop()
