In [111]:
#Step 1: Importing the libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

In [112]:
#Step 2: Defining the csv path and loading its data 
used_cars_csv_path = r"C:\Users\asimi\Documents\Data_Engineering_Bootcamp\Git_Project_1\1st-BBDA-project\Extract\used_cars_train_data.csv"
#used_cars_csv_path = r"C:\Users\dgatopoulos\OneDrive - Deloitte (O365D)\Desktop\Big Blue Data Bootcamp\local repository\dimosthenis-gatopoulos-first-project.git\1st-BBDA-project\Extract\used_cars_data.csv"
df_used_cars = pd.read_csv(used_cars_csv_path)

In [113]:
#Step 3: Initial Data Inspection
print("\nFirst five rows:")
df_used_cars.head()


First five rows:


Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,26.6 km/kg,998 CC,58.16 bhp,5.0,,1.75
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,,12.5
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,,6.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,,17.74


Initial observations:
1. Mileage: contains both the value and the units -> Can be reflected in separate columns.
2. Mileage: is maintained in different units -> Find all cases and convert all values to one unit.
3. Engine: contains both the value and the units -> Can be reflected in separate columns.
4. Power: contains both the value and the units -> Can be reflected in separate columns.
5. Seats: value is reflected as a float -> Can be changed to int.
6. New_Price: missing values (NaN) -> Missing Values can be replaced with zero.
7. New_Price: contains the currency -> Should be reflected in a separate column.
8. New_Price: is reflected in "Lakh" currency -> can be converted to EUR or USD.
9. Price: is reflected in "Lakh" currency -> can be converted to EUR or USD.

In [114]:
print("\nBasic Statistics:")
df_used_cars.info()


Basic Statistics:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6019 entries, 0 to 6018
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Unnamed: 0         6019 non-null   int64  
 1   Name               6019 non-null   object 
 2   Location           6019 non-null   object 
 3   Year               6019 non-null   int64  
 4   Kilometers_Driven  6019 non-null   int64  
 5   Fuel_Type          6019 non-null   object 
 6   Transmission       6019 non-null   object 
 7   Owner_Type         6019 non-null   object 
 8   Mileage            6017 non-null   object 
 9   Engine             5983 non-null   object 
 10  Power              5983 non-null   object 
 11  Seats              5977 non-null   float64
 12  New_Price          824 non-null    object 
 13  Price              6019 non-null   float64
dtypes: float64(2), int64(3), object(9)
memory usage: 658.5+ KB


In [115]:
print("\n Identification of columns with null values:")
print(df_used_cars.isnull().sum())


 Identification of columns with null values:
Unnamed: 0              0
Name                    0
Location                0
Year                    0
Kilometers_Driven       0
Fuel_Type               0
Transmission            0
Owner_Type              0
Mileage                 2
Engine                 36
Power                  36
Seats                  42
New_Price            5195
Price                   0
dtype: int64


In [116]:
#Step 4:Removing null values from engine and then if exist from seats
df_used_cars_rne = df_used_cars.dropna(axis=0,subset=(["Engine","Seats"]))

In [117]:
print("\nIdentification of columns with null values:")
df_used_cars_rne.isnull().sum()


Identification of columns with null values:


Unnamed: 0              0
Name                    0
Location                0
Year                    0
Kilometers_Driven       0
Fuel_Type               0
Transmission            0
Owner_Type              0
Mileage                 2
Engine                  0
Power                   0
Seats                   0
New_Price            5153
Price                   0
dtype: int64

In [118]:
print("\nRemoving rows with null mileage value")
df_used_cars_rmn = df_used_cars_rne.dropna(axis=0,subset=(["Mileage"]))


Removing rows with null mileage value


In [119]:
print("\nIdentification of columns with null values:")
df_used_cars_rmn.isnull().sum()


Identification of columns with null values:


Unnamed: 0              0
Name                    0
Location                0
Year                    0
Kilometers_Driven       0
Fuel_Type               0
Transmission            0
Owner_Type              0
Mileage                 0
Engine                  0
Power                   0
Seats                   0
New_Price            5152
Price                   0
dtype: int64

In [120]:
#Replacing null values in the New_Price with zero.
df_used_cars_wn=df_used_cars_rmn.fillna(0)

In [121]:
print("\nIdentification of columns with null values:")
df_used_cars_wn.isnull().sum()


Identification of columns with null values:


Unnamed: 0           0
Name                 0
Location             0
Year                 0
Kilometers_Driven    0
Fuel_Type            0
Transmission         0
Owner_Type           0
Mileage              0
Engine               0
Power                0
Seats                0
New_Price            0
Price                0
dtype: int64

In [122]:
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,26.6 km/kg,998 CC,58.16 bhp,5.0,0,1.75
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,0,12.5
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,0,6.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,0,17.74


In [123]:
df_used_cars_wn

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,26.6 km/kg,998 CC,58.16 bhp,5.0,0,1.75
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,0,12.50
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.50
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,0,6.00
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,0,17.74
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6014,6014,Maruti Swift VDI,Delhi,2014,27365,Diesel,Manual,First,28.4 kmpl,1248 CC,74 bhp,5.0,7.88 Lakh,4.75
6015,6015,Hyundai Xcent 1.1 CRDi S,Jaipur,2015,100000,Diesel,Manual,First,24.4 kmpl,1120 CC,71 bhp,5.0,0,4.00
6016,6016,Mahindra Xylo D4 BSIV,Jaipur,2012,55000,Diesel,Manual,Second,14.0 kmpl,2498 CC,112 bhp,8.0,0,2.90
6017,6017,Maruti Wagon R VXI,Kolkata,2013,46000,Petrol,Manual,First,18.9 kmpl,998 CC,67.1 bhp,5.0,0,2.65


In [124]:
x=df_used_cars_wn["Mileage"].str.split()[0]
df_used_cars_wn["Mileage Value"]=[x.split()[0] for x in df_used_cars_wn["Mileage"]]
df_used_cars_wn["Mileage Unit"]=[x.split()[-1] for x in df_used_cars_wn["Mileage"]]

In [125]:
df_used_cars_wn

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price,Mileage Value,Mileage Unit
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,26.6 km/kg,998 CC,58.16 bhp,5.0,0,1.75,26.6,km/kg
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,19.67 kmpl,1582 CC,126.2 bhp,5.0,0,12.50,19.67,kmpl
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.50,18.2,kmpl
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,0,6.00,20.77,kmpl
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,15.2 kmpl,1968 CC,140.8 bhp,5.0,0,17.74,15.2,kmpl
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6014,6014,Maruti Swift VDI,Delhi,2014,27365,Diesel,Manual,First,28.4 kmpl,1248 CC,74 bhp,5.0,7.88 Lakh,4.75,28.4,kmpl
6015,6015,Hyundai Xcent 1.1 CRDi S,Jaipur,2015,100000,Diesel,Manual,First,24.4 kmpl,1120 CC,71 bhp,5.0,0,4.00,24.4,kmpl
6016,6016,Mahindra Xylo D4 BSIV,Jaipur,2012,55000,Diesel,Manual,Second,14.0 kmpl,2498 CC,112 bhp,8.0,0,2.90,14.0,kmpl
6017,6017,Maruti Wagon R VXI,Kolkata,2013,46000,Petrol,Manual,First,18.9 kmpl,998 CC,67.1 bhp,5.0,0,2.65,18.9,kmpl


In [126]:
del df_used_cars_wn["Mileage"]
df_used_cars_wn

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Engine,Power,Seats,New_Price,Price,Mileage Value,Mileage Unit
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,998 CC,58.16 bhp,5.0,0,1.75,26.6,km/kg
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,1582 CC,126.2 bhp,5.0,0,12.50,19.67,kmpl
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.50,18.2,kmpl
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,1248 CC,88.76 bhp,7.0,0,6.00,20.77,kmpl
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,1968 CC,140.8 bhp,5.0,0,17.74,15.2,kmpl
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6014,6014,Maruti Swift VDI,Delhi,2014,27365,Diesel,Manual,First,1248 CC,74 bhp,5.0,7.88 Lakh,4.75,28.4,kmpl
6015,6015,Hyundai Xcent 1.1 CRDi S,Jaipur,2015,100000,Diesel,Manual,First,1120 CC,71 bhp,5.0,0,4.00,24.4,kmpl
6016,6016,Mahindra Xylo D4 BSIV,Jaipur,2012,55000,Diesel,Manual,Second,2498 CC,112 bhp,8.0,0,2.90,14.0,kmpl
6017,6017,Maruti Wagon R VXI,Kolkata,2013,46000,Petrol,Manual,First,998 CC,67.1 bhp,5.0,0,2.65,18.9,kmpl


In [127]:
pwr = df_used_cars_wn["Power"].str.split()[0]
df_used_cars_wn["Power Value"] = [pwr.split()[0] for pwr in df_used_cars_wn["Power"]] 
df_used_cars_wn["Power Unit"] = [pwr.split()[-1] for pwr in df_used_cars_wn["Power"]]
df_used_cars_wn

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Engine,Power,Seats,New_Price,Price,Mileage Value,Mileage Unit,Power Value,Power Unit
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,998 CC,58.16 bhp,5.0,0,1.75,26.6,km/kg,58.16,bhp
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,1582 CC,126.2 bhp,5.0,0,12.50,19.67,kmpl,126.2,bhp
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.50,18.2,kmpl,88.7,bhp
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,1248 CC,88.76 bhp,7.0,0,6.00,20.77,kmpl,88.76,bhp
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,1968 CC,140.8 bhp,5.0,0,17.74,15.2,kmpl,140.8,bhp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6014,6014,Maruti Swift VDI,Delhi,2014,27365,Diesel,Manual,First,1248 CC,74 bhp,5.0,7.88 Lakh,4.75,28.4,kmpl,74,bhp
6015,6015,Hyundai Xcent 1.1 CRDi S,Jaipur,2015,100000,Diesel,Manual,First,1120 CC,71 bhp,5.0,0,4.00,24.4,kmpl,71,bhp
6016,6016,Mahindra Xylo D4 BSIV,Jaipur,2012,55000,Diesel,Manual,Second,2498 CC,112 bhp,8.0,0,2.90,14.0,kmpl,112,bhp
6017,6017,Maruti Wagon R VXI,Kolkata,2013,46000,Petrol,Manual,First,998 CC,67.1 bhp,5.0,0,2.65,18.9,kmpl,67.1,bhp


In [128]:
del df_used_cars_wn["Power"]
df_used_cars_wn

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Engine,Seats,New_Price,Price,Mileage Value,Mileage Unit,Power Value,Power Unit
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,998 CC,5.0,0,1.75,26.6,km/kg,58.16,bhp
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,1582 CC,5.0,0,12.50,19.67,kmpl,126.2,bhp
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,1199 CC,5.0,8.61 Lakh,4.50,18.2,kmpl,88.7,bhp
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,1248 CC,7.0,0,6.00,20.77,kmpl,88.76,bhp
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,1968 CC,5.0,0,17.74,15.2,kmpl,140.8,bhp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6014,6014,Maruti Swift VDI,Delhi,2014,27365,Diesel,Manual,First,1248 CC,5.0,7.88 Lakh,4.75,28.4,kmpl,74,bhp
6015,6015,Hyundai Xcent 1.1 CRDi S,Jaipur,2015,100000,Diesel,Manual,First,1120 CC,5.0,0,4.00,24.4,kmpl,71,bhp
6016,6016,Mahindra Xylo D4 BSIV,Jaipur,2012,55000,Diesel,Manual,Second,2498 CC,8.0,0,2.90,14.0,kmpl,112,bhp
6017,6017,Maruti Wagon R VXI,Kolkata,2013,46000,Petrol,Manual,First,998 CC,5.0,0,2.65,18.9,kmpl,67.1,bhp


In [129]:
engine = df_used_cars_wn["Engine"].str.split()[0]
df_used_cars_wn["Engine (cc)"] = [engine.split()[0] for engine in df_used_cars_wn["Engine"]]
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Engine,Seats,New_Price,Price,Mileage Value,Mileage Unit,Power Value,Power Unit,Engine (cc)
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,998 CC,5.0,0,1.75,26.6,km/kg,58.16,bhp,998
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,1582 CC,5.0,0,12.5,19.67,kmpl,126.2,bhp,1582
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,1199 CC,5.0,8.61 Lakh,4.5,18.2,kmpl,88.7,bhp,1199
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,1248 CC,7.0,0,6.0,20.77,kmpl,88.76,bhp,1248
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,1968 CC,5.0,0,17.74,15.2,kmpl,140.8,bhp,1968


In [130]:
del df_used_cars_wn["Engine"]
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,New_Price,Price,Mileage Value,Mileage Unit,Power Value,Power Unit,Engine (cc)
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,0,1.75,26.6,km/kg,58.16,bhp,998
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,0,12.5,19.67,kmpl,126.2,bhp,1582
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,8.61 Lakh,4.5,18.2,kmpl,88.7,bhp,1199
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,0,6.0,20.77,kmpl,88.76,bhp,1248
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,0,17.74,15.2,kmpl,140.8,bhp,1968


In [131]:
df_used_cars_wn.isnull().sum()

Unnamed: 0           0
Name                 0
Location             0
Year                 0
Kilometers_Driven    0
Fuel_Type            0
Transmission         0
Owner_Type           0
Seats                0
New_Price            0
Price                0
Mileage Value        0
Mileage Unit         0
Power Value          0
Power Unit           0
Engine (cc)          0
dtype: int64

In [132]:
df_used_cars_wn["Price (euro)"] = df_used_cars_wn["Price"] * 100000 * 0.010
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,New_Price,Price,Mileage Value,Mileage Unit,Power Value,Power Unit,Engine (cc),Price (euro)
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,0,1.75,26.6,km/kg,58.16,bhp,998,1750.0
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,0,12.5,19.67,kmpl,126.2,bhp,1582,12500.0
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,8.61 Lakh,4.5,18.2,kmpl,88.7,bhp,1199,4500.0
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,0,6.0,20.77,kmpl,88.76,bhp,1248,6000.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,0,17.74,15.2,kmpl,140.8,bhp,1968,17740.0


In [133]:
del df_used_cars_wn["Price"]
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,New_Price,Mileage Value,Mileage Unit,Power Value,Power Unit,Engine (cc),Price (euro)
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,0,26.6,km/kg,58.16,bhp,998,1750.0
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,0,19.67,kmpl,126.2,bhp,1582,12500.0
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,8.61 Lakh,18.2,kmpl,88.7,bhp,1199,4500.0
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,0,20.77,kmpl,88.76,bhp,1248,6000.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,0,15.2,kmpl,140.8,bhp,1968,17740.0


In [134]:
def extract_value(x1):
    if isinstance(x1,str):
        return float(x1.split()[0])
    else:
        return x1

df_used_cars_wn["New Price"] = df_used_cars_wn["New_Price"].apply(extract_value)
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,New_Price,Mileage Value,Mileage Unit,Power Value,Power Unit,Engine (cc),Price (euro),New Price
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,0,26.6,km/kg,58.16,bhp,998,1750.0,0.0
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,0,19.67,kmpl,126.2,bhp,1582,12500.0,0.0
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,8.61 Lakh,18.2,kmpl,88.7,bhp,1199,4500.0,8.61
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,0,20.77,kmpl,88.76,bhp,1248,6000.0,0.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,0,15.2,kmpl,140.8,bhp,1968,17740.0,0.0


In [135]:
df_used_cars_wn["New Price (euro)"]=df_used_cars_wn["New Price"]*100000*0.010
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,New_Price,Mileage Value,Mileage Unit,Power Value,Power Unit,Engine (cc),Price (euro),New Price,New Price (euro)
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,0,26.6,km/kg,58.16,bhp,998,1750.0,0.0,0.0
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,0,19.67,kmpl,126.2,bhp,1582,12500.0,0.0,0.0
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,8.61 Lakh,18.2,kmpl,88.7,bhp,1199,4500.0,8.61,8610.0
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,0,20.77,kmpl,88.76,bhp,1248,6000.0,0.0,0.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,0,15.2,kmpl,140.8,bhp,1968,17740.0,0.0,0.0


In [136]:
df_used_cars_wn.drop(columns=["New_Price","New Price"],inplace=True)
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage Value,Mileage Unit,Power Value,Power Unit,Engine (cc),Price (euro),New Price (euro)
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,26.6,km/kg,58.16,bhp,998,1750.0,0.0
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,19.67,kmpl,126.2,bhp,1582,12500.0,0.0
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,18.2,kmpl,88.7,bhp,1199,4500.0,8610.0
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,20.77,kmpl,88.76,bhp,1248,6000.0,0.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,15.2,kmpl,140.8,bhp,1968,17740.0,0.0


In [137]:
fuel_density = {"Petrol":0.74,"Diesel":0.832,"LPG":0.54,"CNG":0.714}

In [138]:
df_used_cars_wn['Mileage Value'] = pd.to_numeric(df_used_cars_wn['Mileage Value'], errors='coerce')

def mileage_to_l_per_100_km(x2):
    value = x2["Mileage Value"]
    unit = x2["Mileage Unit"]
    ftype = x2["Fuel_Type"]

    if unit.lower() == "kmpl":
        if value != 0:
            return 100/value
        else:
            value
    else:
        density = fuel_density.get(ftype,None)
        return 100/(value/density)

df_used_cars_wn["Mileage_L/100km"]=df_used_cars_wn.apply(mileage_to_l_per_100_km, axis=1)
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage Value,Mileage Unit,Power Value,Power Unit,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,26.6,km/kg,58.16,bhp,998,1750.0,0.0,2.684211
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,19.67,kmpl,126.2,bhp,1582,12500.0,0.0,5.083884
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,18.2,kmpl,88.7,bhp,1199,4500.0,8610.0,5.494505
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,20.77,kmpl,88.76,bhp,1248,6000.0,0.0,4.814636
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,15.2,kmpl,140.8,bhp,1968,17740.0,0.0,6.578947


In [139]:
df_used_cars_wn["Mileage_Unit"] = "l/100km"
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage Value,Mileage Unit,Power Value,Power Unit,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km,Mileage_Unit
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,26.6,km/kg,58.16,bhp,998,1750.0,0.0,2.684211,l/100km
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,19.67,kmpl,126.2,bhp,1582,12500.0,0.0,5.083884,l/100km
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,18.2,kmpl,88.7,bhp,1199,4500.0,8610.0,5.494505,l/100km
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,20.77,kmpl,88.76,bhp,1248,6000.0,0.0,4.814636,l/100km
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,15.2,kmpl,140.8,bhp,1968,17740.0,0.0,6.578947,l/100km


In [140]:
df_used_cars_wn.drop(columns=["Mileage Value","Mileage Unit"],inplace=True)
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Power Value,Power Unit,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km,Mileage_Unit
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,58.16,bhp,998,1750.0,0.0,2.684211,l/100km
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,126.2,bhp,1582,12500.0,0.0,5.083884,l/100km
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,88.7,bhp,1199,4500.0,8610.0,5.494505,l/100km
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,88.76,bhp,1248,6000.0,0.0,4.814636,l/100km
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,140.8,bhp,1968,17740.0,0.0,6.578947,l/100km


In [141]:
set(list(df_used_cars_wn["Power Unit"]))

{'bhp'}

In [142]:
df_used_cars_wn["Power Value"] = pd.to_numeric(df_used_cars_wn["Power Value"],errors="coerce")
df_used_cars_wn["Power_Value"] = df_used_cars_wn["Power Value"] * 0.746
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Power Value,Power Unit,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km,Mileage_Unit,Power_Value
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,58.16,bhp,998,1750.0,0.0,2.684211,l/100km,43.38736
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,126.2,bhp,1582,12500.0,0.0,5.083884,l/100km,94.1452
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,88.7,bhp,1199,4500.0,8610.0,5.494505,l/100km,66.1702
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,88.76,bhp,1248,6000.0,0.0,4.814636,l/100km,66.21496
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,140.8,bhp,1968,17740.0,0.0,6.578947,l/100km,105.0368


In [143]:
df_used_cars_wn["Power_Unit"]="kW"
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Power Value,Power Unit,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,58.16,bhp,998,1750.0,0.0,2.684211,l/100km,43.38736,kW
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,126.2,bhp,1582,12500.0,0.0,5.083884,l/100km,94.1452,kW
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,88.7,bhp,1199,4500.0,8610.0,5.494505,l/100km,66.1702,kW
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,88.76,bhp,1248,6000.0,0.0,4.814636,l/100km,66.21496,kW
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,140.8,bhp,1968,17740.0,0.0,6.578947,l/100km,105.0368,kW


In [144]:
df_used_cars_wn.drop(columns=["Power Value","Power Unit"],inplace=True)
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,998,1750.0,0.0,2.684211,l/100km,43.38736,kW
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,1582,12500.0,0.0,5.083884,l/100km,94.1452,kW
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,1199,4500.0,8610.0,5.494505,l/100km,66.1702,kW
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,1248,6000.0,0.0,4.814636,l/100km,66.21496,kW
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,1968,17740.0,0.0,6.578947,l/100km,105.0368,kW


In [145]:
df_used_cars_wn["Mileage_L/100km"]=round(df_used_cars_wn["Mileage_L/100km"],1)
df_used_cars_wn["Power_Value"]=round(df_used_cars_wn["Power_Value"],1)
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,998,1750.0,0.0,2.7,l/100km,43.4,kW
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,1582,12500.0,0.0,5.1,l/100km,94.1,kW
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,1199,4500.0,8610.0,5.5,l/100km,66.2,kW
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,1248,6000.0,0.0,4.8,l/100km,66.2,kW
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,1968,17740.0,0.0,6.6,l/100km,105.0,kW


In [146]:
df_used_cars_wn["Price_EUR"] = df_used_cars_wn["Price (euro)"]
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,998,1750.0,0.0,2.7,l/100km,43.4,kW,1750.0
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,1582,12500.0,0.0,5.1,l/100km,94.1,kW,12500.0
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,1199,4500.0,8610.0,5.5,l/100km,66.2,kW,4500.0
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,1248,6000.0,0.0,4.8,l/100km,66.2,kW,6000.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,1968,17740.0,0.0,6.6,l/100km,105.0,kW,17740.0


In [147]:
df_used_cars_wn["New_Price_EUR"] = df_used_cars_wn["New Price (euro)"]
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,998,1750.0,0.0,2.7,l/100km,43.4,kW,1750.0,0.0
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,1582,12500.0,0.0,5.1,l/100km,94.1,kW,12500.0,0.0
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,1199,4500.0,8610.0,5.5,l/100km,66.2,kW,4500.0,8610.0
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,1248,6000.0,0.0,4.8,l/100km,66.2,kW,6000.0,0.0
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,1968,17740.0,0.0,6.6,l/100km,105.0,kW,17740.0,0.0


In [148]:
df_used_cars_wn["Engine_cc"] = df_used_cars_wn["Engine (cc)"]
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Engine (cc),Price (euro),New Price (euro),Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR,Engine_cc
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,998,1750.0,0.0,2.7,l/100km,43.4,kW,1750.0,0.0,998
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,1582,12500.0,0.0,5.1,l/100km,94.1,kW,12500.0,0.0,1582
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,1199,4500.0,8610.0,5.5,l/100km,66.2,kW,4500.0,8610.0,1199
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,1248,6000.0,0.0,4.8,l/100km,66.2,kW,6000.0,0.0,1248
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,1968,17740.0,0.0,6.6,l/100km,105.0,kW,17740.0,0.0,1968


In [149]:
df_used_cars_wn.drop(columns=["Engine (cc)","Price (euro)","New Price (euro)"],inplace=True)
df_used_cars_wn.head()

Unnamed: 0.1,Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR,Engine_cc
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,2.7,l/100km,43.4,kW,1750.0,0.0,998
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,5.1,l/100km,94.1,kW,12500.0,0.0,1582
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,5.5,l/100km,66.2,kW,4500.0,8610.0,1199
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,4.8,l/100km,66.2,kW,6000.0,0.0,1248
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,6.6,l/100km,105.0,kW,17740.0,0.0,1968


In [150]:
df_used_cars_wn.rename(columns={"Unnamed: 0":"Index"},inplace=True)
df_used_cars_wn.head()

Unnamed: 0,Index,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR,Engine_cc
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,2.7,l/100km,43.4,kW,1750.0,0.0,998
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,5.1,l/100km,94.1,kW,12500.0,0.0,1582
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,5.5,l/100km,66.2,kW,4500.0,8610.0,1199
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,4.8,l/100km,66.2,kW,6000.0,0.0,1248
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,6.6,l/100km,105.0,kW,17740.0,0.0,1968


In [151]:
brand = df_used_cars_wn["Name"].str.split()[0][0]
df_used_cars_wn["Brand"] = [brand.split()[0] for brand in df_used_cars_wn["Name"]]
df_used_cars_wn.head()

Unnamed: 0,Index,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR,Engine_cc,Brand
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,2.7,l/100km,43.4,kW,1750.0,0.0,998,Maruti
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,5.1,l/100km,94.1,kW,12500.0,0.0,1582,Hyundai
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,5.5,l/100km,66.2,kW,4500.0,8610.0,1199,Honda
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,4.8,l/100km,66.2,kW,6000.0,0.0,1248,Maruti
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,6.6,l/100km,105.0,kW,17740.0,0.0,1968,Audi


In [152]:
set(list(df_used_cars_wn["Brand"]))

{'Ambassador',
 'Audi',
 'BMW',
 'Bentley',
 'Chevrolet',
 'Datsun',
 'Fiat',
 'Force',
 'Ford',
 'Honda',
 'Hyundai',
 'ISUZU',
 'Isuzu',
 'Jaguar',
 'Jeep',
 'Lamborghini',
 'Land',
 'Mahindra',
 'Maruti',
 'Mercedes-Benz',
 'Mini',
 'Mitsubishi',
 'Nissan',
 'Porsche',
 'Renault',
 'Skoda',
 'Smart',
 'Tata',
 'Toyota',
 'Volkswagen',
 'Volvo'}

In [153]:
#Replace ISUZU
#def isuzu(x3):
 #   branding = 

SyntaxError: invalid syntax (3671025548.py, line 3)

In [154]:
df_used_cars_clean = df_used_cars_wn.drop_duplicates()
df_used_cars_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5975 entries, 0 to 6018
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Index              5975 non-null   int64  
 1   Name               5975 non-null   object 
 2   Location           5975 non-null   object 
 3   Year               5975 non-null   int64  
 4   Kilometers_Driven  5975 non-null   int64  
 5   Fuel_Type          5975 non-null   object 
 6   Transmission       5975 non-null   object 
 7   Owner_Type         5975 non-null   object 
 8   Seats              5975 non-null   float64
 9   Mileage_L/100km    5919 non-null   float64
 10  Mileage_Unit       5975 non-null   object 
 11  Power_Value        5872 non-null   float64
 12  Power_Unit         5975 non-null   object 
 13  Price_EUR          5975 non-null   float64
 14  New_Price_EUR      5975 non-null   float64
 15  Engine_cc          5975 non-null   object 
 16  Brand              5975 

In [155]:
df_used_cars_clean.head()

Unnamed: 0,Index,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR,Engine_cc,Brand
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,2.7,l/100km,43.4,kW,1750.0,0.0,998,Maruti
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,5.1,l/100km,94.1,kW,12500.0,0.0,1582,Hyundai
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,5.5,l/100km,66.2,kW,4500.0,8610.0,1199,Honda
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,4.8,l/100km,66.2,kW,6000.0,0.0,1248,Maruti
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,6.6,l/100km,105.0,kW,17740.0,0.0,1968,Audi


In [156]:
df_used_cars_clean["Brand"] = df_used_cars_clean["Brand"].replace("ISUZU","Isuzu")
df_used_cars_clean.head()

Unnamed: 0,Index,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR,Engine_cc,Brand
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,2.7,l/100km,43.4,kW,1750.0,0.0,998,Maruti
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,5.1,l/100km,94.1,kW,12500.0,0.0,1582,Hyundai
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,5.5,l/100km,66.2,kW,4500.0,8610.0,1199,Honda
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,4.8,l/100km,66.2,kW,6000.0,0.0,1248,Maruti
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,6.6,l/100km,105.0,kW,17740.0,0.0,1968,Audi


In [157]:
set(list(df_used_cars_clean["Brand"]))

{'Ambassador',
 'Audi',
 'BMW',
 'Bentley',
 'Chevrolet',
 'Datsun',
 'Fiat',
 'Force',
 'Ford',
 'Honda',
 'Hyundai',
 'Isuzu',
 'Jaguar',
 'Jeep',
 'Lamborghini',
 'Land',
 'Mahindra',
 'Maruti',
 'Mercedes-Benz',
 'Mini',
 'Mitsubishi',
 'Nissan',
 'Porsche',
 'Renault',
 'Skoda',
 'Smart',
 'Tata',
 'Toyota',
 'Volkswagen',
 'Volvo'}

In [158]:
df_used_cars_cln = df_used_cars_clean.drop_duplicates()
df_used_cars_cln.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5975 entries, 0 to 6018
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Index              5975 non-null   int64  
 1   Name               5975 non-null   object 
 2   Location           5975 non-null   object 
 3   Year               5975 non-null   int64  
 4   Kilometers_Driven  5975 non-null   int64  
 5   Fuel_Type          5975 non-null   object 
 6   Transmission       5975 non-null   object 
 7   Owner_Type         5975 non-null   object 
 8   Seats              5975 non-null   float64
 9   Mileage_L/100km    5919 non-null   float64
 10  Mileage_Unit       5975 non-null   object 
 11  Power_Value        5872 non-null   float64
 12  Power_Unit         5975 non-null   object 
 13  Price_EUR          5975 non-null   float64
 14  New_Price_EUR      5975 non-null   float64
 15  Engine_cc          5975 non-null   object 
 16  Brand              5975 

In [159]:
df_used_cars_cln.head()

Unnamed: 0,Index,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR,Engine_cc,Brand
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5.0,2.7,l/100km,43.4,kW,1750.0,0.0,998,Maruti
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5.0,5.1,l/100km,94.1,kW,12500.0,0.0,1582,Hyundai
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5.0,5.5,l/100km,66.2,kW,4500.0,8610.0,1199,Honda
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7.0,4.8,l/100km,66.2,kW,6000.0,0.0,1248,Maruti
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5.0,6.6,l/100km,105.0,kW,17740.0,0.0,1968,Audi


In [160]:
df_used_cars_cln["Seats"]=df_used_cars_cln["Seats"].astype(int)
df_used_cars_cln.head()

Unnamed: 0,Index,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR,Engine_cc,Brand
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5,2.7,l/100km,43.4,kW,1750.0,0.0,998,Maruti
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5,5.1,l/100km,94.1,kW,12500.0,0.0,1582,Hyundai
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5,5.5,l/100km,66.2,kW,4500.0,8610.0,1199,Honda
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7,4.8,l/100km,66.2,kW,6000.0,0.0,1248,Maruti
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5,6.6,l/100km,105.0,kW,17740.0,0.0,1968,Audi


In [161]:
df_used_cars_cln

Unnamed: 0,Index,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Seats,Mileage_L/100km,Mileage_Unit,Power_Value,Power_Unit,Price_EUR,New_Price_EUR,Engine_cc,Brand
0,0,Maruti Wagon R LXI CNG,Mumbai,2010,72000,CNG,Manual,First,5,2.7,l/100km,43.4,kW,1750.0,0.0,998,Maruti
1,1,Hyundai Creta 1.6 CRDi SX Option,Pune,2015,41000,Diesel,Manual,First,5,5.1,l/100km,94.1,kW,12500.0,0.0,1582,Hyundai
2,2,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,5,5.5,l/100km,66.2,kW,4500.0,8610.0,1199,Honda
3,3,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,7,4.8,l/100km,66.2,kW,6000.0,0.0,1248,Maruti
4,4,Audi A4 New 2.0 TDI Multitronic,Coimbatore,2013,40670,Diesel,Automatic,Second,5,6.6,l/100km,105.0,kW,17740.0,0.0,1968,Audi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6014,6014,Maruti Swift VDI,Delhi,2014,27365,Diesel,Manual,First,5,3.5,l/100km,55.2,kW,4750.0,7880.0,1248,Maruti
6015,6015,Hyundai Xcent 1.1 CRDi S,Jaipur,2015,100000,Diesel,Manual,First,5,4.1,l/100km,53.0,kW,4000.0,0.0,1120,Hyundai
6016,6016,Mahindra Xylo D4 BSIV,Jaipur,2012,55000,Diesel,Manual,Second,8,7.1,l/100km,83.6,kW,2900.0,0.0,2498,Mahindra
6017,6017,Maruti Wagon R VXI,Kolkata,2013,46000,Petrol,Manual,First,5,5.3,l/100km,50.1,kW,2650.0,0.0,998,Maruti


In [162]:
df_used_cars_cln.to_csv("Final_car_data.csv", index=False)