In [242]:
import pandas as pd
import ast# I use it to safely evaluate strings that contain Python literals 
           #(e.g., strings, numbers, lists, dictionaries, tuples

***Read bangalore excel file***

In [243]:
bangalore = pd.read_excel("../raw_data/bangalore_cars.xlsx")# read exel file

In [244]:
bangalore.head()

Unnamed: 0,new_car_detail,new_car_overview,new_car_feature,new_car_specs,car_links
0,"{'it': 0, 'ft': 'Petrol', 'bt': 'Hatchback', '...","{'heading': 'Car overview', 'top': [{'key': 'R...","{'heading': 'Features', 'top': [{'value': 'Pow...","{'heading': 'Specifications', 'top': [{'key': ...",https://www.cardekho.com/used-car-details/used...
1,"{'it': 0, 'ft': 'Petrol', 'bt': 'SUV', 'km': '...","{'heading': 'Car overview', 'top': [{'key': 'R...","{'heading': 'Features', 'top': [{'value': 'Pow...","{'heading': 'Specifications', 'top': [{'key': ...",https://www.cardekho.com/buy-used-car-details/...
2,"{'it': 0, 'ft': 'Petrol', 'bt': 'Hatchback', '...","{'heading': 'Car overview', 'top': [{'key': 'R...","{'heading': 'Features', 'top': [{'value': 'Pow...","{'heading': 'Specifications', 'top': [{'key': ...",https://www.cardekho.com/used-car-details/used...
3,"{'it': 0, 'ft': 'Petrol', 'bt': 'Sedan', 'km':...","{'heading': 'Car overview', 'top': [{'key': 'R...","{'heading': 'Features', 'top': [{'value': 'Pow...","{'heading': 'Specifications', 'top': [{'key': ...",https://www.cardekho.com/buy-used-car-details/...
4,"{'it': 0, 'ft': 'Diesel', 'bt': 'SUV', 'km': '...","{'heading': 'Car overview', 'top': [{'key': 'R...","{'heading': 'Features', 'top': [{'value': 'Pow...","{'heading': 'Specifications', 'top': [{'key': ...",https://www.cardekho.com/used-car-details/used...


***Cleaned new car overview***

In [245]:
# ast_literal_eval will converts the string into the python dictionary.
bangalore['new_car_overview'] = bangalore['new_car_overview'].apply(ast.literal_eval)
# json_normalize will help us to normalise semi structured data like dictionary into NORMAL dataframe
df_expanded = pd.json_normalize(bangalore['new_car_overview'])


***df_expanded has a column named "top" which has a list of dictionaries***
***each dictionary in the list contains key and value pairs***


In [246]:
rows = []
for index, row in df_expanded.iterrows():##df_expanded has a column named "top" which has a list of dictionaries
##each dictionary in the list contains key and value pairs
## for loop iterates rows in dataframe using iterrows and which returns index and rows as a series
    # variable top_list Extract the list of dictionaries from the column'top' 
    top_list = row['top']
    
    # Created a new dictionary where asigned as key: values from the current rows of top_list 
    #which helps to transform list of dictionaries into single list of dictionary
    columns_dict = {item['key']: item['value'] for item in top_list}
    
    # Append the new dictionary to the list of rows
    rows.append(columns_dict)

car_overview = pd.DataFrame(rows)


In [247]:
car_overview.head()

Unnamed: 0,Registration Year,Insurance Validity,Fuel Type,Seats,Kms Driven,RTO,Ownership,Engine Displacement,Transmission,Year of Manufacture
0,2015,Third Party insurance,Petrol,5 Seats,"1,20,000 Kms",KA51,Third Owner,998 cc,Manual,2015.0
1,Feb 2018,Comprehensive,Petrol,5 Seats,"32,706 Kms",KA05,Second Owner,1497 cc,Manual,2018.0
2,Sept 2018,Comprehensive,Petrol,5 Seats,"11,949 Kms",KA03,First Owner,1199 cc,Manual,2018.0
3,Dec 2014,Comprehensive,Petrol,5 Seats,"17,794 Kms",KA53,First Owner,1197 cc,Manual,2014.0
4,2015,Third Party insurance,Diesel,5 Seats,"60,000 Kms",KA04,First Owner,1248 cc,Manual,2015.0


***Cleaned new car features***

In [248]:
# Iam using ast.literal eval  
# which helps me to convert the python string literal(eg:dictionaries, tuple ,..) to the corresponding python object 
# json like strings are converted into the normal python dictionaries, list
bangalore['new_car_feature'] = bangalore['new_car_feature'].apply(ast.literal_eval)
# json normalise will help us to normalise the json dict into the normal column and rows in new_car_feature dataframe
features_expanded = pd.json_normalize(bangalore['new_car_feature'])

In [249]:
# Expand 'top' field
def expand_top(top_list):
    return [item['value'] for item in top_list] # this will return me list of the value from each dictionary.

# Extract 'top' features
bangalore['top_features'] = bangalore['new_car_feature'].apply(lambda x: expand_top(x['top']))

# Expand 'data' field
#The data field contains a more complex structure—each element in the list has a heading (the main category) and 
# another list of features under that heading. The goal here is to extract the features under each heading.
def expand_data(data_list):
    expanded_data = {}
    for item in data_list:
        heading = item['heading']
        expanded_data[heading] = [feature['value'] for feature in item['list']]
    return expanded_data

# Extract 'data' features
data_features = bangalore['new_car_feature'].apply(lambda x: expand_data(x['data']))

# Normalize 'data_features' and concatenate with 'top_features'
data_features_df = pd.json_normalize(data_features)

# Concatenate 'top_features' and 'data_features_df'
top_feature_df = pd.concat([bangalore[['top_features']], data_features_df], axis=1)

In [250]:
top_feature_df.head()

Unnamed: 0,top_features,Comfort & Convenience,Interior,Exterior,Safety,Entertainment & Communication
0,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Digital Odometer, El...","[Adjustable Head Lights, Manually Adjustable E...","[Centeral Locking, Child Safety Locks, Day Nig...",
1,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re..."
2,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re..."
3,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re..."
4,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Radio, Audio System Remote Control, Speakers ..."


***Cleaned new car specs***

In [251]:
bangalore['new_car_specs'] = bangalore['new_car_specs'].apply(ast.literal_eval)

specs_expanded = pd.json_normalize(bangalore['new_car_specs'])

In [252]:
rows = []
for index, row in specs_expanded.iterrows():
    # Extract the 'top' list from the row
    top_list = row['top']
    
    # Create a dictionary for the current row
    columns_dict = {item['key']: item['value'] for item in top_list}
    
    # Append the dictionary to the list of rows
    rows.append(columns_dict)

car_specs = pd.DataFrame(rows)

In [253]:
car_specs.head()

Unnamed: 0,Mileage,Engine,Max Power,Torque,Seats,Wheel Size
0,23.1 kmpl,998 CC,67.04bhp,90Nm,5,
1,17 kmpl,1497 CC,121.31bhp,150Nm,5,16.0
2,23.84 kmpl,1199 CC,84bhp,114Nm,5,14.0
3,19.1 kmpl,1197 CC,81.86bhp,113.75Nm,5,14.0
4,23.65 kmpl,1248 CC,88.5bhp,200Nm,5,16.0


***Cleaned new car details***

In [254]:
bangalore['new_car_detail'] = bangalore['new_car_detail'].apply(ast.literal_eval)

detail_expanded = pd.json_normalize(bangalore['new_car_detail'])

In [273]:
rows = []
for index, row in detail_expanded.iterrows():

    # Append the dictionary to the list of rows
    rows.append(row)

car_overview = pd.DataFrame(rows)

In [274]:
car_overview

Unnamed: 0,it,ft,bt,km,transmission,ownerNo,owner,oem,model,modelYear,centralVariantId,variantName,price,priceActual,priceSaving,priceFixedText,trendingText.imgUrl,trendingText.heading,trendingText.desc
0,0,Petrol,Hatchback,120000,Manual,3,3rd Owner,Maruti,Maruti Celerio,2015,3979,VXI,₹ 4 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
1,0,Petrol,SUV,32706,Manual,2,2nd Owner,Ford,Ford Ecosport,2018,6087,1.5 Petrol Titanium BSIV,₹ 8.11 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
2,0,Petrol,Hatchback,11949,Manual,1,1st Owner,Tata,Tata Tiago,2018,2983,1.2 Revotron XZ,₹ 5.85 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
3,0,Petrol,Sedan,17794,Manual,1,1st Owner,Hyundai,Hyundai Xcent,2014,1867,1.2 Kappa S Option,₹ 4.62 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
4,0,Diesel,SUV,60000,Manual,1,1st Owner,Maruti,Maruti SX4 S Cross,2015,4277,DDiS 200 Zeta,₹ 7.90 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
5,0,Diesel,SUV,20000,Manual,1,1st Owner,Jeep,Jeep Compass,2020,5931,2.0 Longitude BSIV,₹ 19 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
6,0,Petrol,Hatchback,37772,Manual,1,1st Owner,Datsun,Datsun GO,2017,5223,T Option BSIV,₹ 3.45 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
7,0,Petrol,SUV,30000,Automatic,1,1st Owner,Hyundai,Hyundai Venue,2021,6946,SX Plus Turbo DCT BSIV,₹ 12 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
8,0,Petrol,Sedan,37000,Automatic,1,1st Owner,Maruti,Maruti Ciaz,2018,6555,Alpha Automatic BSIV,₹ 9.60 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
9,0,Petrol,Hatchback,11949,Manual,1,1st Owner,Tata,Tata Tiago,2017,2985,1.2 Revotron XZ WO Alloy,₹ 5.85 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days


In [256]:
newcar_details = pd.DataFrame(detail_expanded)

In [257]:
newcar_details.head()

Unnamed: 0,it,ft,bt,km,transmission,ownerNo,owner,oem,model,modelYear,centralVariantId,variantName,price,priceActual,priceSaving,priceFixedText,trendingText.imgUrl,trendingText.heading,trendingText.desc
0,0,Petrol,Hatchback,120000,Manual,3,3rd Owner,Maruti,Maruti Celerio,2015,3979,VXI,₹ 4 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
1,0,Petrol,SUV,32706,Manual,2,2nd Owner,Ford,Ford Ecosport,2018,6087,1.5 Petrol Titanium BSIV,₹ 8.11 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
2,0,Petrol,Hatchback,11949,Manual,1,1st Owner,Tata,Tata Tiago,2018,2983,1.2 Revotron XZ,₹ 5.85 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
3,0,Petrol,Sedan,17794,Manual,1,1st Owner,Hyundai,Hyundai Xcent,2014,1867,1.2 Kappa S Option,₹ 4.62 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
4,0,Diesel,SUV,60000,Manual,1,1st Owner,Maruti,Maruti SX4 S Cross,2015,4277,DDiS 200 Zeta,₹ 7.90 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days


In [258]:
car_overview =car_overview.reset_index(drop = True)
top_feature_df = top_feature_df.reset_index(drop = True)
car_specs = car_specs.reset_index(drop = True)
newcar_details = newcar_details.reset_index(drop = True)

In [259]:
final_df_combined = pd.concat([car_overview, top_feature_df, car_specs, newcar_details], axis = 1)

In [260]:
final_df_combined.head()

Unnamed: 0,Mileage,Engine,Max Power,Torque,Wheel Size,Seats,top_features,Comfort & Convenience,Interior,Exterior,Safety,Entertainment & Communication,Mileage.1,Engine.1,Max Power.1,Torque.1,Seats.1,Wheel Size.1,it,ft,bt,km,transmission,ownerNo,owner,oem,model,modelYear,centralVariantId,variantName,price,priceActual,priceSaving,priceFixedText,trendingText.imgUrl,trendingText.heading,trendingText.desc
0,18.9 kmpl,1197 CC,82bhp,114Nm,14,5,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Digital Odometer, El...","[Adjustable Head Lights, Manually Adjustable E...","[Centeral Locking, Child Safety Locks, Day Nig...",,23.1 kmpl,998 CC,67.04bhp,90Nm,5,,0,Petrol,Hatchback,120000,Manual,3,3rd Owner,Maruti,Maruti Celerio,2015,3979,VXI,₹ 4 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
1,18.9 kmpl,1197 CC,82bhp,114Nm,14,5,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re...",17 kmpl,1497 CC,121.31bhp,150Nm,5,16.0,0,Petrol,SUV,32706,Manual,2,2nd Owner,Ford,Ford Ecosport,2018,6087,1.5 Petrol Titanium BSIV,₹ 8.11 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
2,18.9 kmpl,1197 CC,82bhp,114Nm,14,5,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re...",23.84 kmpl,1199 CC,84bhp,114Nm,5,14.0,0,Petrol,Hatchback,11949,Manual,1,1st Owner,Tata,Tata Tiago,2018,2983,1.2 Revotron XZ,₹ 5.85 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
3,18.9 kmpl,1197 CC,82bhp,114Nm,14,5,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re...",19.1 kmpl,1197 CC,81.86bhp,113.75Nm,5,14.0,0,Petrol,Sedan,17794,Manual,1,1st Owner,Hyundai,Hyundai Xcent,2014,1867,1.2 Kappa S Option,₹ 4.62 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days
4,18.9 kmpl,1197 CC,82bhp,114Nm,14,5,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Radio, Audio System Remote Control, Speakers ...",23.65 kmpl,1248 CC,88.5bhp,200Nm,5,16.0,0,Diesel,SUV,60000,Manual,1,1st Owner,Maruti,Maruti SX4 S Cross,2015,4277,DDiS 200 Zeta,₹ 7.90 Lakh,,,,https://stimg.cardekho.com/used-cars/common/ic...,Trending Car!,High chances of sale in next 6 days


In [261]:
final_df_combined['top_features'][0]

['Power Steering',
 'Power Windows Front',
 'Air Conditioner',
 'Heater',
 'Adjustable Head Lights',
 'Manually Adjustable Exterior Rear View Mirror',
 'Centeral Locking',
 'Child Safety Locks']

In [262]:
# drop duplicate columns with same values
final_df_combined = final_df_combined.loc[:, ~final_df_combined.columns.duplicated()]

In [263]:
final_df_combined.columns

Index(['Mileage', 'Engine', 'Max Power', 'Torque', 'Wheel Size', 'Seats',
       'top_features', 'Comfort & Convenience', 'Interior', 'Exterior',
       'Safety', 'Entertainment & Communication', 'it', 'ft', 'bt', 'km',
       'transmission', 'ownerNo', 'owner', 'oem', 'model', 'modelYear',
       'centralVariantId', 'variantName', 'price', 'priceActual',
       'priceSaving', 'priceFixedText', 'trendingText.imgUrl',
       'trendingText.heading', 'trendingText.desc'],
      dtype='object')

In [264]:
# Get all column names
columns = final_df_combined.columns

# Create an empty list to store duplicate column pairs
duplicate_columns = []

# Loop through each pair of columns and compare them
for i in range(len(columns)):
    for j in range(i + 1, len(columns)):
        if final_df_combined[columns[i]].equals(final_df_combined[columns[j]]):
            duplicate_columns.append((columns[i], columns[j]))

# Print the pairs of duplicate columns
print("Duplicate columns with identical features:")
for col1, col2 in duplicate_columns:
    print(f"{col1} and {col2}")

Duplicate columns with identical features:


In [265]:
final_df_combined.ownerNo.unique()

array([3, 2, 1, 4, 5])

In [266]:
final_df_combined.owner.unique()

array(['3rd Owner', '2nd Owner', '1st Owner', '4th Owner', '5th Owner'],
      dtype=object)

In [267]:
final_df_combined['RTO'].str[:2].unique()

KeyError: 'RTO'

In [None]:
unwanted_columns = ['Registration Year', 'transmission', 'Kms Driven', 'Engine Displacement', 'trendingText.imgUrl', 'trendingText.heading', 'trendingText.desc', 'priceFixedText', 
                    'owner', 'it', 'ft', 'Ownership', 'Year of Manufacture']

In [None]:
cars_df = final_df_combined.drop(columns=unwanted_columns)

In [None]:
cars_df.columns

Index(['Insurance Validity', 'Fuel Type', 'Seats', 'RTO', 'Transmission',
       'top_features', 'Comfort & Convenience', 'Interior', 'Exterior',
       'Safety', 'Entertainment & Communication', 'Mileage', 'Engine',
       'Max Power', 'Torque', 'Wheel Size', 'bt', 'km', 'ownerNo', 'oem',
       'model', 'modelYear', 'centralVariantId', 'variantName', 'price',
       'priceActual', 'priceSaving'],
      dtype='object')

In [None]:
cars_df.oem.unique()

array(['Maruti', 'Ford', 'Tata', 'Hyundai', 'Jeep', 'Datsun', 'Honda',
       'Mahindra', 'Mercedes-Benz', 'BMW', 'Renault', 'Audi', 'Toyota',
       'Mini', 'Kia', 'Skoda', 'Volkswagen', 'Volvo', 'MG', 'Nissan',
       'Fiat', 'Mahindra Ssangyong', 'Mitsubishi', 'Jaguar', 'Land Rover',
       'Chevrolet', 'Citroen', 'Opel', 'Mahindra Renault'], dtype=object)

In [None]:
#cars_df[['Kms Driven', 'km']].head()

In [None]:
cars_df.head()

Unnamed: 0,Insurance Validity,Fuel Type,Seats,RTO,Transmission,top_features,Comfort & Convenience,Interior,Exterior,Safety,Entertainment & Communication,Mileage,Engine,Max Power,Torque,Wheel Size,bt,km,ownerNo,oem,model,modelYear,centralVariantId,variantName,price,priceActual,priceSaving
0,Third Party insurance,Petrol,5 Seats,KA51,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Digital Odometer, El...","[Adjustable Head Lights, Manually Adjustable E...","[Centeral Locking, Child Safety Locks, Day Nig...",,23.1 kmpl,998 CC,67.04bhp,90Nm,,Hatchback,120000,3,Maruti,Maruti Celerio,2015,3979,VXI,₹ 4 Lakh,,
1,Comprehensive,Petrol,5 Seats,KA05,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re...",17 kmpl,1497 CC,121.31bhp,150Nm,16.0,SUV,32706,2,Ford,Ford Ecosport,2018,6087,1.5 Petrol Titanium BSIV,₹ 8.11 Lakh,,
2,Comprehensive,Petrol,5 Seats,KA03,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re...",23.84 kmpl,1199 CC,84bhp,114Nm,14.0,Hatchback,11949,1,Tata,Tata Tiago,2018,2983,1.2 Revotron XZ,₹ 5.85 Lakh,,
3,Comprehensive,Petrol,5 Seats,KA53,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re...",19.1 kmpl,1197 CC,81.86bhp,113.75Nm,14.0,Sedan,17794,1,Hyundai,Hyundai Xcent,2014,1867,1.2 Kappa S Option,₹ 4.62 Lakh,,
4,Third Party insurance,Diesel,5 Seats,KA04,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Radio, Audio System Remote Control, Speakers ...",23.65 kmpl,1248 CC,88.5bhp,200Nm,16.0,SUV,60000,1,Maruti,Maruti SX4 S Cross,2015,4277,DDiS 200 Zeta,₹ 7.90 Lakh,,


In [None]:
cars_df.columns

Index(['Insurance Validity', 'Fuel Type', 'Seats', 'RTO', 'Transmission',
       'top_features', 'Comfort & Convenience', 'Interior', 'Exterior',
       'Safety', 'Entertainment & Communication', 'Mileage', 'Engine',
       'Max Power', 'Torque', 'Wheel Size', 'bt', 'km', 'ownerNo', 'oem',
       'model', 'modelYear', 'centralVariantId', 'variantName', 'price',
       'priceActual', 'priceSaving'],
      dtype='object')

In [None]:
# Complete mapping dictionary
rename_dict = {
    'Insurance Validity': 'Insurance_Validity_Period',
    'Fuel Type': 'Fuel_Type',
    'Seats': 'Number_of_Seats',
    'RTO': 'Regional_Transport_Office',
    'Transmission': 'Transmission_Type',
    'top_features': 'Top_Features',
    'Comfort & Convenience': 'Comfort_and_Convenience',
    'Interior': 'Interior_Features',
    'Exterior': 'Exterior_Features',
    'Safety': 'Safety_Features',
    'Entertainment & Communication': 'Entertainment_and_Communication',
    'Mileage': 'Mileage_(km/l)',
    'Engine': 'Engine_Capacity',
    'Max Power': 'Maximum_Power',
    'Torque': 'Torque',
    'Wheel Size': 'Wheel_Size',
    'bt': 'Battery_Type',
    'km': 'Kilometers_Driven',
    'ownerNo': 'Number_of_Owners',
    'oem': 'Original_Equipment_Manufacturer',
    'model': 'Car_Model',
    'modelYear': 'Model_Year',
    'centralVariantId': 'Central_Variant_ID',
    'variantName': 'Variant_Name',
    'price': 'Listed_Price',
    'priceActual': 'Actual_Price',
    'priceSaving': 'Price_Saving_Amount'
}

In [None]:
cars_df.rename(columns=rename_dict, inplace=True)

In [None]:
cars_df.head()

Unnamed: 0,Insurance_Validity_Period,Fuel_Type,Number_of_Seats,Regional_Transport_Office,Transmission_Type,Top_Features,Comfort_and_Convenience,Interior_Features,Exterior_Features,Safety_Features,Entertainment_and_Communication,Mileage_(km/l),Engine_Capacity,Maximum_Power,Torque,Wheel_Size,Battery_Type,Kilometers_Driven,Number_of_Owners,Original_Equipment_Manufacturer,Car_Model,Model_Year,Central_Variant_ID,Variant_Name,Listed_Price,Actual_Price,Price_Saving_Amount
0,Third Party insurance,Petrol,5 Seats,KA51,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Digital Odometer, El...","[Adjustable Head Lights, Manually Adjustable E...","[Centeral Locking, Child Safety Locks, Day Nig...",,23.1 kmpl,998 CC,67.04bhp,90Nm,,Hatchback,120000,3,Maruti,Maruti Celerio,2015,3979,VXI,₹ 4 Lakh,,
1,Comprehensive,Petrol,5 Seats,KA05,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re...",17 kmpl,1497 CC,121.31bhp,150Nm,16.0,SUV,32706,2,Ford,Ford Ecosport,2018,6087,1.5 Petrol Titanium BSIV,₹ 8.11 Lakh,,
2,Comprehensive,Petrol,5 Seats,KA03,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re...",23.84 kmpl,1199 CC,84bhp,114Nm,14.0,Hatchback,11949,1,Tata,Tata Tiago,2018,2983,1.2 Revotron XZ,₹ 5.85 Lakh,,
3,Comprehensive,Petrol,5 Seats,KA53,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Cd Player, Radio, Speakers Front, Speakers Re...",19.1 kmpl,1197 CC,81.86bhp,113.75Nm,14.0,Sedan,17794,1,Hyundai,Hyundai Xcent,2014,1867,1.2 Kappa S Option,₹ 4.62 Lakh,,
4,Third Party insurance,Diesel,5 Seats,KA04,Manual,"[Power Steering, Power Windows Front, Air Cond...","[Power Steering, Power Windows Front, Power Wi...","[Air Conditioner, Heater, Adjustable Steering,...","[Adjustable Head Lights, Fog Lights Front, Pow...","[Anti Lock Braking System, Centeral Locking, P...","[Radio, Audio System Remote Control, Speakers ...",23.65 kmpl,1248 CC,88.5bhp,200Nm,16.0,SUV,60000,1,Maruti,Maruti SX4 S Cross,2015,4277,DDiS 200 Zeta,₹ 7.90 Lakh,,


In [None]:
cars_df.to_excel('../cleaned_data/bangalore_cars_cleaned.xlsx', index= False)