In [15]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

In [8]:
df = pd.read_csv("used_car_dataset.csv")  

In [9]:
df

Unnamed: 0,year,mileage,fuel_type,transmission,seller_type,car_condition,brand,price
0,2006,158812,Electric,Automatic,Dealer,Excellent,BMW,15636
1,2019,64353,Diesel,Manual,Dealer,Poor,Toyota,6583
2,2014,118132,Diesel,Manual,Dealer,Excellent,Toyota,10821
3,2010,192604,Electric,Automatic,Dealer,Excellent,Tesla,15681
4,2007,21052,Diesel,Automatic,Dealer,Good,Hyundai,11798
...,...,...,...,...,...,...,...,...
2495,2022,119589,Electric,Manual,Individual,Good,Toyota,10008
2496,2006,150406,Diesel,Automatic,Online Platform,Good,Ford,9153
2497,2004,99715,Electric,Semi-Auto,Individual,Excellent,Hyundai,9644
2498,2000,198839,Hybrid,Semi-Auto,Individual,Fair,Ford,5369


In [4]:
df['year_binned'] = pd.cut(df['year'], bins=3, labels=['Old', 'Mid', 'New'])

In [10]:
transmission_order = [['Manual', 'Semi-Auto', 'Automatic']]
condition_order = [['Poor', 'Fair', 'Good', 'Excellent']]

In [11]:
trans_encoder = OrdinalEncoder(categories=transmission_order)
df['transmission_encoded'] = trans_encoder.fit_transform(df[['transmission']])

cond_encoder = OrdinalEncoder(categories=condition_order)
df['car_condition_encoded'] = cond_encoder.fit_transform(df[['car_condition']])


In [12]:
df

Unnamed: 0,year,mileage,fuel_type,transmission,seller_type,car_condition,brand,price,transmission_encoded,car_condition_encoded
0,2006,158812,Electric,Automatic,Dealer,Excellent,BMW,15636,2.0,3.0
1,2019,64353,Diesel,Manual,Dealer,Poor,Toyota,6583,0.0,0.0
2,2014,118132,Diesel,Manual,Dealer,Excellent,Toyota,10821,0.0,3.0
3,2010,192604,Electric,Automatic,Dealer,Excellent,Tesla,15681,2.0,3.0
4,2007,21052,Diesel,Automatic,Dealer,Good,Hyundai,11798,2.0,2.0
...,...,...,...,...,...,...,...,...,...,...
2495,2022,119589,Electric,Manual,Individual,Good,Toyota,10008,0.0,2.0
2496,2006,150406,Diesel,Automatic,Online Platform,Good,Ford,9153,2.0,2.0
2497,2004,99715,Electric,Semi-Auto,Individual,Excellent,Hyundai,9644,1.0,3.0
2498,2000,198839,Hybrid,Semi-Auto,Individual,Fair,Ford,5369,1.0,1.0


In [13]:
brand_means = df.groupby('brand')['price'].mean()
df['brand_encoded'] = df['brand'].map(brand_means)

In [14]:
df

Unnamed: 0,year,mileage,fuel_type,transmission,seller_type,car_condition,brand,price,transmission_encoded,car_condition_encoded,brand_encoded
0,2006,158812,Electric,Automatic,Dealer,Excellent,BMW,15636,2.0,3.0,17744.289575
1,2019,64353,Diesel,Manual,Dealer,Poor,Toyota,6583,0.0,0.0,8994.430063
2,2014,118132,Diesel,Manual,Dealer,Excellent,Toyota,10821,0.0,3.0,8994.430063
3,2010,192604,Electric,Automatic,Dealer,Excellent,Tesla,15681,2.0,3.0,20495.056225
4,2007,21052,Diesel,Automatic,Dealer,Good,Hyundai,11798,2.0,2.0,7075.614108
...,...,...,...,...,...,...,...,...,...,...,...
2495,2022,119589,Electric,Manual,Individual,Good,Toyota,10008,0.0,2.0,8994.430063
2496,2006,150406,Diesel,Automatic,Online Platform,Good,Ford,9153,2.0,2.0,10530.678776
2497,2004,99715,Electric,Semi-Auto,Individual,Excellent,Hyundai,9644,1.0,3.0,7075.614108
2498,2000,198839,Hybrid,Semi-Auto,Individual,Fair,Ford,5369,1.0,1.0,10530.678776


In [16]:
encoder = OneHotEncoder(sparse=False)
encoded = encoder.fit_transform(df[['fuel_type', 'seller_type']])


encoded_df = pd.DataFrame(encoded, columns=encoder.get_feature_names_out(['fuel_type', 'seller_type']))
df = df.drop(columns=['fuel_type', 'seller_type']).reset_index(drop=True)
df = pd.concat([df, encoded_df], axis=1)



In [17]:
df

Unnamed: 0,year,mileage,transmission,car_condition,brand,price,transmission_encoded,car_condition_encoded,brand_encoded,fuel_type_Diesel,fuel_type_Electric,fuel_type_Hybrid,fuel_type_Petrol,seller_type_Dealer,seller_type_Individual,seller_type_Online Platform
0,2006,158812,Automatic,Excellent,BMW,15636,2.0,3.0,17744.289575,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1,2019,64353,Manual,Poor,Toyota,6583,0.0,0.0,8994.430063,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2,2014,118132,Manual,Excellent,Toyota,10821,0.0,3.0,8994.430063,1.0,0.0,0.0,0.0,1.0,0.0,0.0
3,2010,192604,Automatic,Excellent,Tesla,15681,2.0,3.0,20495.056225,0.0,1.0,0.0,0.0,1.0,0.0,0.0
4,2007,21052,Automatic,Good,Hyundai,11798,2.0,2.0,7075.614108,1.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,2022,119589,Manual,Good,Toyota,10008,0.0,2.0,8994.430063,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2496,2006,150406,Automatic,Good,Ford,9153,2.0,2.0,10530.678776,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2497,2004,99715,Semi-Auto,Excellent,Hyundai,9644,1.0,3.0,7075.614108,0.0,1.0,0.0,0.0,0.0,1.0,0.0
2498,2000,198839,Semi-Auto,Fair,Ford,5369,1.0,1.0,10530.678776,0.0,0.0,1.0,0.0,0.0,1.0,0.0
