# OIBSIP
## TASK 3: CAR PRICE PREDICTION WITH MACHINE LEARNING

<br>

**AUTHOR :** Mohammed Khubaib
<br>
**MODEL :** Linear Regression

## Import Required :
 - Python Modules
 - Cars Dataset

In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [3]:
df = pd.read_csv('cars.csv')

## Performing EDA

In [4]:
df.head()

Unnamed: 0,car_ID,symboling,CarName,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,1,3,alfa-romero giulia,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,2,3,alfa-romero stelvio,gas,std,two,convertible,rwd,front,88.6,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,3,1,alfa-romero Quadrifoglio,gas,std,two,hatchback,rwd,front,94.5,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,4,2,audi 100 ls,gas,std,four,sedan,fwd,front,99.8,...,109,mpfi,3.19,3.4,10.0,102,5500,24,30,13950.0
4,5,2,audi 100ls,gas,std,four,sedan,4wd,front,99.4,...,136,mpfi,3.19,3.4,8.0,115,5500,18,22,17450.0


In [5]:
df.columns

Index(['car_ID', 'symboling', 'CarName', 'fueltype', 'aspiration',
       'doornumber', 'carbody', 'drivewheel', 'enginelocation', 'wheelbase',
       'carlength', 'carwidth', 'carheight', 'curbweight', 'enginetype',
       'cylindernumber', 'enginesize', 'fuelsystem', 'boreratio', 'stroke',
       'compressionratio', 'horsepower', 'peakrpm', 'citympg', 'highwaympg',
       'price'],
      dtype='object')

In [6]:
# Drop irrelevant columns
df = df.drop(['car_ID', 'symboling', 'CarName'], axis=1)

In [7]:
df

Unnamed: 0,fueltype,aspiration,doornumber,carbody,drivewheel,enginelocation,wheelbase,carlength,carwidth,carheight,...,enginesize,fuelsystem,boreratio,stroke,compressionratio,horsepower,peakrpm,citympg,highwaympg,price
0,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,13495.0
1,gas,std,two,convertible,rwd,front,88.6,168.8,64.1,48.8,...,130,mpfi,3.47,2.68,9.0,111,5000,21,27,16500.0
2,gas,std,two,hatchback,rwd,front,94.5,171.2,65.5,52.4,...,152,mpfi,2.68,3.47,9.0,154,5000,19,26,16500.0
3,gas,std,four,sedan,fwd,front,99.8,176.6,66.2,54.3,...,109,mpfi,3.19,3.40,10.0,102,5500,24,30,13950.0
4,gas,std,four,sedan,4wd,front,99.4,176.6,66.4,54.3,...,136,mpfi,3.19,3.40,8.0,115,5500,18,22,17450.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,55.5,...,141,mpfi,3.78,3.15,9.5,114,5400,23,28,16845.0
201,gas,turbo,four,sedan,rwd,front,109.1,188.8,68.8,55.5,...,141,mpfi,3.78,3.15,8.7,160,5300,19,25,19045.0
202,gas,std,four,sedan,rwd,front,109.1,188.8,68.9,55.5,...,173,mpfi,3.58,2.87,8.8,134,5500,18,23,21485.0
203,diesel,turbo,four,sedan,rwd,front,109.1,188.8,68.9,55.5,...,145,idi,3.01,3.40,23.0,106,4800,26,27,22470.0


In [8]:
# Convert categorical variables into dummy/indicator variables
df = pd.get_dummies(df, drop_first=True)

In [9]:
df

Unnamed: 0,wheelbase,carlength,carwidth,carheight,curbweight,enginesize,boreratio,stroke,compressionratio,horsepower,...,cylindernumber_three,cylindernumber_twelve,cylindernumber_two,fuelsystem_2bbl,fuelsystem_4bbl,fuelsystem_idi,fuelsystem_mfi,fuelsystem_mpfi,fuelsystem_spdi,fuelsystem_spfi
0,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,...,0,0,0,0,0,0,0,1,0,0
1,88.6,168.8,64.1,48.8,2548,130,3.47,2.68,9.0,111,...,0,0,0,0,0,0,0,1,0,0
2,94.5,171.2,65.5,52.4,2823,152,2.68,3.47,9.0,154,...,0,0,0,0,0,0,0,1,0,0
3,99.8,176.6,66.2,54.3,2337,109,3.19,3.40,10.0,102,...,0,0,0,0,0,0,0,1,0,0
4,99.4,176.6,66.4,54.3,2824,136,3.19,3.40,8.0,115,...,0,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200,109.1,188.8,68.9,55.5,2952,141,3.78,3.15,9.5,114,...,0,0,0,0,0,0,0,1,0,0
201,109.1,188.8,68.8,55.5,3049,141,3.78,3.15,8.7,160,...,0,0,0,0,0,0,0,1,0,0
202,109.1,188.8,68.9,55.5,3012,173,3.58,2.87,8.8,134,...,0,0,0,0,0,0,0,1,0,0
203,109.1,188.8,68.9,55.5,3217,145,3.01,3.40,23.0,106,...,0,0,0,0,0,1,0,0,0,0


## Creating Linear Regression Model

In [10]:
# Separate features and target variable
X = df.drop('price', axis=1)
y = df['price']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
model = LinearRegression()

In [13]:
model.fit(X_train, y_train)

In [14]:
y_pred = model.predict(X_test)

In [15]:
mse = mean_squared_error(y_test, y_pred)
print('Mean Squared Error:', mse)

Mean Squared Error: 8438408.202676434


## Custom Testing :

In [None]:
X.columns.nunique()

In [16]:
car_features = {'wheelbase': 88.60,
                'carlength': 168.80,
                'carwidth': 64.10,
                'carheight': 48.80,
                'curbweight': 2548.00,
                'enginesize': 130.00,
                'boreratio': 3.47,
                'stroke': 2.68,
                'compressionratio': 9.00,
                'horsepower': 111.00,
                'peakrpm': 5000.00,
                'citympg': 21.00,
                'highwaympg': 27.00,
                'fueltype_gas': 1,
                'aspiration_turbo': 0,
                'doornumber_two': 1,
                'carbody_hardtop': 0,
                'carbody_hatchback': 0,
                'carbody_sedan': 0,
                'carbody_wagon': 0,
                'drivewheel_fwd': 1,
                'drivewheel_rwd': 0,
                'enginelocation_rear': 0,
                'enginetype_dohcv': 0,
                'enginetype_l': 0,
                'enginetype_ohc': 0,
                'enginetype_ohcf': 0,
                'enginetype_ohcv': 0,
                'enginetype_rotor': 0,
                'cylindernumber_five': 0,
                'cylindernumber_four': 1,
                'cylindernumber_six': 0,
                'cylindernumber_three': 0,
                'cylindernumber_twelve': 0,
                'cylindernumber_two': 0,
                'fuelsystem_2bbl': 0,
                'fuelsystem_4bbl': 0,
                'fuelsystem_idi': 0,
                'fuelsystem_mfi': 0,
                'fuelsystem_mpfi': 0,
                'fuelsystem_spdi': 0,
                'fuelsystem_spfi': 0}

custom_data = pd.DataFrame(car_features, index=[0])

# Align the custom data with the original dataset columns
custom_data = custom_data.reindex(columns=X.columns, fill_value=0)

predicted_price = model.predict(custom_data)
print(f"Predicted Car Price: ${predicted_price[0]:,.2f}")


Predicted Car Price: $12,625.52
