# Data Preprocessing
 This file preprocesses the raw data extracted from Facebook Marketplace. It includes data cleaning, column analysis, value extraction, null value analysis, and related steps. 

### Importing necessary libraries

In [248]:
import pandas as pd 
import numpy as np
import warnings
warnings.filterwarnings("ignore")

### Reading the csv file

In [249]:
df = pd.read_csv("data/raw_data_facebook_scraper.csv")
df

Unnamed: 0,id,primary_listing_photo,if_gk_just_listed_tag_on_search_feed,listing_price,strikethrough_price,comparable_price,comparable_price_type,location,is_hidden,is_live,...,legal_disclosure_impressum_url,commerce_badges_info,listing_address,seller_phone_number,vehicle_website_link,dealership_name,seller,energy_efficiency_class_eu,listing_photos,pre_recorded_videos
0,842325545077316,{'image': {'uri': 'https://scontent.fsyd4-2.fn...,,"{'formatted_amount': 'AU$11,300', 'amount_with...",,,,"{'reverse_geocode': {'city': 'Brisbane', 'stat...",False,True,...,,,,,,,,,,
1,1282138740755042,{'image': {'uri': 'https://scontent.fsyd4-2.fn...,,"{'formatted_amount': 'AU$5,000', 'amount_with_...","{'formatted_amount': 'AU$7,000', 'amount': '70...",,,"{'reverse_geocode': {'city': 'Brisbane', 'stat...",False,True,...,,,,,,,,,,
2,4343086952677152,{'image': {'uri': 'https://scontent.fsyd4-1.fn...,,"{'formatted_amount': 'AU$13,500', 'amount_with...",,,,"{'reverse_geocode': {'city': 'Brisbane', 'stat...",False,True,...,,,,,,,,,,
3,856599473632809,{'image': {'uri': 'https://scontent.fsyd4-1.fn...,,"{'formatted_amount': 'AU$89,999', 'amount_with...",,,,"{'reverse_geocode': {'city': 'Brisbane', 'stat...",False,True,...,,,,,,,,,,
4,1929741430912801,{'image': {'uri': 'https://scontent.fsyd4-1.fn...,,"{'formatted_amount': 'AU$18,800', 'amount_with...",,,,"{'reverse_geocode': {'city': 'Brisbane', 'stat...",False,True,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11950,1626734915162026,{'image': {'uri': 'https://scontent-syd2-1.xx....,,"{'amount_with_offset': '590000', 'currency': '...",,,,"{'latitude': -34.928283691406, 'longitude': 13...",False,True,...,,"{'source_summary': None, 'badges': []}",,,,,,,[{'accessibility_caption': 'May be an image of...,[]
11951,2186106862127606,{'image': {'uri': 'https://scontent.fbne6-1.fn...,,"{'amount_with_offset': '2495000', 'currency': ...",,,,"{'latitude': -37.768249511719, 'longitude': 14...",False,True,...,,{'source_summary': 'Highly rated on Marketplac...,,,,,,,[{'accessibility_caption': 'May be an image of...,[]
11952,1436459311236585,{'image': {'uri': 'https://scontent.fbne5-1.fn...,,"{'amount_with_offset': '899000', 'currency': '...",,,,"{'latitude': -37.817687988281, 'longitude': 14...",False,True,...,,"{'source_summary': None, 'badges': []}",,,,,,,[{'accessibility_caption': 'No photo descripti...,[]
11953,1084441146768423,{'image': {'uri': 'https://scontent.fbne5-1.fn...,,"{'amount_with_offset': '1000', 'currency': 'AU...",,,,"{'latitude': -38.075866699219, 'longitude': 14...",False,True,...,,"{'source_summary': None, 'badges': []}",,,,,,,[{'accessibility_caption': 'No photo descripti...,[]


## 1) Removing Null Columns

- To improve data quality, we dropped the columns that had **100% missing values** (all `NaN`).  
- These columns do not contribute any useful information to the model and only increase dataset size and complexity.


In [250]:
#checking cols that have all null values in it
all_null_cols_mask = df.isnull().all()
cols_with_all_nulls = df.columns[all_null_cols_mask].tolist()
print(cols_with_all_nulls)
df.drop(cols_with_all_nulls,axis=1,inplace=True)

['if_gk_just_listed_tag_on_search_feed', 'comparable_price', 'comparable_price_type', 'min_listing_price', 'max_listing_price', 'origin_group', 'listing_video', 'parent_listing', 'marketplace_listing_seller', 'product_feedback', 'story', 'payment_time_period', 'marketplace_lead_gen_form', 'seller_message_thread', 'default_variant_listing', 'reportable_seller_type', 'active_order', 'rebuy_order_receipt', 'most_recent_active_order_as_buyer', 'shipping_profile', 'vacation_mode', 'incentive_campaign_for_free_shipping', 'first_message_suggested_value', 'sweepstake_content', 'marketplace_bump_info', 'fair_market_value_data', 'vehicle_identification_number', 'vehicle_registration_plate_information', 'vehicle_carfax_report', 'legal_disclosure_impressum_url', 'vehicle_website_link', 'seller', 'energy_efficiency_class_eu']


In [251]:
null_counts = df.isnull().sum()
percentage = 0.5
columns_with_many_nulls = null_counts[null_counts > len(df)*percentage].index.tolist()
print(columns_with_many_nulls)

['strikethrough_price', 'inventory_count', 'legal_reporting_cta_type', 'legal_reporting_uri', 'seller_cms', 'vehicle_condition', 'vehicle_is_paid_off', 'vehicle_number_of_owners', 'vehicle_title_status', 'vehicle_trim_display_name', 'listing_address', 'seller_phone_number', 'dealership_name']


## Dropped Columns 

### 1. Low Variance (Near-Constant Features)
These columns contain almost the same value across the dataset (the most frequent value appears in more than **11,000 out of 11,955 rows**), providing little to no predictive power.

- `condition`



### 2. URLs, Media, and Link-Only Columns
These columns only store URLs or media references and do not provide numerical or categorical information useful for price prediction.

- `listing_photos`  
- `listing_url`  
- `input_url`  
- `share_uri`  



### 3. Duplicate or Redundant Information
These columns duplicate information already captured in other features, making them unnecessary.

- `custom_title` (covered by `listing_title`)  
- `location` (duplicate of `location_text`)  
- `custom_sub_titles_with_rendering_flags`  



### 4. System-Generated or Unique Identifiers
These columns contain IDs or system-level metadata that are unique per listing and do not contribute to learning price patterns.

- `reportable_ent_id`  
- `primary_mp_ent`  
- `logging_id`  
- `location_vanity_or_id`  



### 5. Not Relevant for Price Prediction
These columns describe visibility settings, listing status, or platform-specific metadata that do not influence vehicle price.

- `viewer_is_buyer`  
- `commerce_badges_info`  
- `cross_post_info`  
- `hidden_from_friends` 
- `is_hidden`
- `is_live`
- `is_pending`
- `is_sold`,
- `is_viewer_seller`
- `marketplace_listing_category_id`
- `delivery_types`
- `attribute_data`
- `origin_target`
- `shipping_offered`
- `pre_recorded_videos`
- `vehicle_seller_type`



### 6. Mostly Empty or Uninformative Features
These columns contain mostly empty values (e.g., empty lists) or lack meaningful structure.

- `vehicle_features`  
- `product_item`  
- `creation_time` 
- `listing_inventory_type`
- `messaging_enabled`
- `listing_is_rejected`
- `is_checkout_enabled`
- `is_draft`
- `can_seller_edit`
- `can_share`
- `can_seller_change_availability`
- `is_on_marketplace`
- `messagingEnabled`
- `has_children`
- `c2c_shipping_eligible`
- `is_shipping_offered`
- `order_summaries`
- `should_hide_pdp_shipping_content`
- `is_seller_business_onboarded`
- `is_buy_now_enabled`
- `is_purchase_protected`
- `can_buyer_make_checkout_offer`
- `is_cart_enabled`
- `all_message_threads`
- `should_show_txn_survey_on_mas`
- `sweepstake_enabled`
- `sweepstake_status`
- `fi_enhanced_appeal_data`
- `listing_seller_notices`
- `is_email_communication_enabled`



### 7. High Missing Values
These columns contain **more than 6,000 missing values** (over 50% of the dataset), making them unreliable for modeling.

- `vehicle_number_of_owners`  
- `strikethrough_price`  
- `inventory_count`  
- `legal_reporting_cta_type`  
- `legal_reporting_uri`  
- `seller_cms`  
- `vehicle_condition`  
- `vehicle_is_paid_off`  
- `vehicle_title_status`  
- `vehicle_trim_display_name`  
- `listing_address`  
- `seller_phone_number`  
- `dealership_name`  

In [252]:
cols_to_remove = ['strikethrough_price', 'inventory_count', 'legal_reporting_cta_type', 
        'legal_reporting_uri', 'seller_cms', 'vehicle_condition', 'vehicle_is_paid_off', 
        'vehicle_number_of_owners', 'vehicle_title_status', 'vehicle_trim_display_name', 
        'listing_address', 'seller_phone_number', 'dealership_name', 'id', 'primary_listing_photo',
        'is_hidden', 'is_live', 'is_pending', 'is_sold','is_viewer_seller', 'marketplace_listing_category_id',
       'delivery_types',"listing_photos","commerce_badges_info","custom_title","listingUrl","inputUrl",
        "if_viewer_is_buyer","vehicle_features","location","location_vanity_or_id","product_item",
        "cross_post_info","hidden_from_friends","reportable_ent_id",
        "primary_mp_ent","logging_id","share_uri",
        "custom_sub_titles_with_rendering_flags","vehicle_number_of_owners","condition", 'creation_time', 
        'listing_inventory_type', 'messaging_enabled', 'listing_is_rejected',
       'is_checkout_enabled', 'is_draft', 'can_seller_edit', 'can_share',
       'can_seller_change_availability', 'is_on_marketplace',
       'messagingEnabled', 'has_children', 'c2c_shipping_eligible',
       'is_shipping_offered', 'order_summaries',
       'should_hide_pdp_shipping_content', 'is_seller_business_onboarded',
       'is_buy_now_enabled', 'is_purchase_protected',
       'can_buyer_make_checkout_offer', 'is_cart_enabled',
       'all_message_threads', 'should_show_txn_survey_on_mas',
       'sweepstake_enabled', 'sweepstake_status', 'fi_enhanced_appeal_data',
       'listing_seller_notices', 'is_email_communication_enabled','attribute_data',
       'origin_target', 'shipping_offered', 'pre_recorded_videos','vehicle_seller_type']

df.drop(cols_to_remove,axis=1,inplace=True)

In [253]:
df.columns

Index(['listing_price', 'marketplace_listing_title', 'redacted_description',
       'location_text', 'formatted_price', 'vehicle_exterior_color',
       'vehicle_fuel_type', 'vehicle_interior_color',
       'vehicle_make_display_name', 'vehicle_model_display_name',
       'vehicle_odometer_data', 'vehicle_specifications',
       'vehicle_transmission_type'],
      dtype='object')

In [254]:
df.tail(7)

Unnamed: 0,listing_price,marketplace_listing_title,redacted_description,location_text,formatted_price,vehicle_exterior_color,vehicle_fuel_type,vehicle_interior_color,vehicle_make_display_name,vehicle_model_display_name,vehicle_odometer_data,vehicle_specifications,vehicle_transmission_type
11948,"{'amount_with_offset': '2900000', 'currency': ...",2020 Volvo XC60,{'text': 'Volvo XC60 T5 Momentum – MY2020 – 98...,"{'text': 'Melbourne, VIC'}","{'text': 'AU$29,000'}",blue,GASOLINE,black,Volvo,XC60,"{'unit': 'KILOMETERS', 'value': 99500}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11949,"{'amount_with_offset': '2799900', 'currency': ...",2019 Ford RANGER WILDTRAK PX MKlll AUTO,"{'text': '$27,999 Ex Gov Charges \n\n2019 Ford...","{'text': 'Adelaide, SA'}","{'text': 'AU$27,999'}",silver,DIESEL,black,Ford,ranger wildtrak px mklll auto,"{'unit': 'KILOMETERS', 'value': 247000}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11950,"{'amount_with_offset': '590000', 'currency': '...",2002 Mercedes-Benz Sedan,{'text': 'Mercedes Benz kompressor classic sed...,"{'text': 'Adelaide, SA'}","{'text': 'AU$5,900'}",white,GASOLINE,off_white,Mercedes-Benz,sedan,"{'unit': 'KILOMETERS', 'value': 181000}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11951,"{'amount_with_offset': '2495000', 'currency': ...",2021 Nissan Qashqai,{'text': '2021 Nissan Qashqui Ti For Sale\nOne...,"{'text': 'Melbourne, VIC'}","{'text': 'AU$24,950'}",grey,GASOLINE,black,Nissan,Qashqai,"{'unit': 'KILOMETERS', 'value': 67502}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11952,"{'amount_with_offset': '899000', 'currency': '...",2013 Hyundai 2013 Hyundai ix35 LM3 Elite Wagon...,"{'text': ""Will not respond to is this availabl...","{'text': 'Melbourne, VIC'}","{'text': 'AU$8,990'}",grey,PETROL,black,Hyundai,2013 hyundai ix35 lm3 elite wagon 5dr spts aut...,"{'unit': 'KILOMETERS', 'value': 232045}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11953,"{'amount_with_offset': '1000', 'currency': 'AU...",2024 Mercedes-Benz GLA250,{'text': 'ALL CAR PARTS FOR SALE \nMelbourne C...,"{'text': 'Melbourne, VIC'}",{'text': 'AU$10'},black,PETROL,black,Mercedes-Benz,gla-class,"{'unit': 'KILOMETERS', 'value': 14000}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11954,"{'amount_with_offset': '950000', 'currency': '...",2015 Mitsubishi Lancer,"{'text': 'Clean, reliable, and fuel-efficient....","{'text': 'Melbourne, VIC'}","{'text': 'AU$9,500'}",white,GASOLINE,black,Mitsubishi,Lancer,"{'unit': 'KILOMETERS', 'value': 252433}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC


#### Dropping Duplicate Price Column

We dropped `formatted_price` because it contained some incorrect values (especially in the offset) and it was also a duplicate of `listing_price`.  
Since both columns represent the same information, we kept **`listing_price`** as the main price column.

In [255]:
print(df["listing_price"].isnull().sum())
df['listing_price'].value_counts

0


<bound method IndexOpsMixin.value_counts of 0        {'formatted_amount': 'AU$11,300', 'amount_with...
1        {'formatted_amount': 'AU$5,000', 'amount_with_...
2        {'formatted_amount': 'AU$13,500', 'amount_with...
3        {'formatted_amount': 'AU$89,999', 'amount_with...
4        {'formatted_amount': 'AU$18,800', 'amount_with...
                               ...                        
11950    {'amount_with_offset': '590000', 'currency': '...
11951    {'amount_with_offset': '2495000', 'currency': ...
11952    {'amount_with_offset': '899000', 'currency': '...
11953    {'amount_with_offset': '1000', 'currency': 'AU...
11954    {'amount_with_offset': '950000', 'currency': '...
Name: listing_price, Length: 11955, dtype: object>

In [256]:
df["formatted_price"].isnull().sum()

114

In [257]:
df.drop("listing_price",axis=1,inplace=True)

In [258]:
df.tail()

Unnamed: 0,marketplace_listing_title,redacted_description,location_text,formatted_price,vehicle_exterior_color,vehicle_fuel_type,vehicle_interior_color,vehicle_make_display_name,vehicle_model_display_name,vehicle_odometer_data,vehicle_specifications,vehicle_transmission_type
11950,2002 Mercedes-Benz Sedan,{'text': 'Mercedes Benz kompressor classic sed...,"{'text': 'Adelaide, SA'}","{'text': 'AU$5,900'}",white,GASOLINE,off_white,Mercedes-Benz,sedan,"{'unit': 'KILOMETERS', 'value': 181000}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11951,2021 Nissan Qashqai,{'text': '2021 Nissan Qashqui Ti For Sale\nOne...,"{'text': 'Melbourne, VIC'}","{'text': 'AU$24,950'}",grey,GASOLINE,black,Nissan,Qashqai,"{'unit': 'KILOMETERS', 'value': 67502}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11952,2013 Hyundai 2013 Hyundai ix35 LM3 Elite Wagon...,"{'text': ""Will not respond to is this availabl...","{'text': 'Melbourne, VIC'}","{'text': 'AU$8,990'}",grey,PETROL,black,Hyundai,2013 hyundai ix35 lm3 elite wagon 5dr spts aut...,"{'unit': 'KILOMETERS', 'value': 232045}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11953,2024 Mercedes-Benz GLA250,{'text': 'ALL CAR PARTS FOR SALE \nMelbourne C...,"{'text': 'Melbourne, VIC'}",{'text': 'AU$10'},black,PETROL,black,Mercedes-Benz,gla-class,"{'unit': 'KILOMETERS', 'value': 14000}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC
11954,2015 Mitsubishi Lancer,"{'text': 'Clean, reliable, and fuel-efficient....","{'text': 'Melbourne, VIC'}","{'text': 'AU$9,500'}",white,GASOLINE,black,Mitsubishi,Lancer,"{'unit': 'KILOMETERS', 'value': 252433}","{'co2_emissions': None, 'engine_size': None, '...",AUTOMATIC


## Dictionary Column Extraction

In [259]:
df["dist"] = df["vehicle_odometer_data"].astype(str).str[10:20]
df["dist"].value_counts()

dist
KILOMETERS    11820
                114
one, 'valu       17
MILES', 'v        4
Name: count, dtype: int64

In [260]:
mask = df["dist"] == "KILOMETERS"

df.loc[mask, "kms"] = (
    df.loc[mask, "vehicle_odometer_data"]
      .astype(str)
      .str.extract(r"'value'\s*:\s*(\d+)", expand=False)
      .astype("Int64")
)
df[["vehicle_odometer_data","kms"]].tail(15)

Unnamed: 0,vehicle_odometer_data,kms
11940,"{'unit': 'KILOMETERS', 'value': 245930}",245930
11941,"{'unit': 'KILOMETERS', 'value': 75003}",75003
11942,"{'unit': 'KILOMETERS', 'value': 60000}",60000
11943,"{'unit': 'KILOMETERS', 'value': 116348}",116348
11944,"{'unit': 'KILOMETERS', 'value': 75000}",75000
11945,"{'unit': 'KILOMETERS', 'value': 96000}",96000
11946,"{'unit': 'KILOMETERS', 'value': 245000}",245000
11947,"{'unit': 'KILOMETERS', 'value': 166000}",166000
11948,"{'unit': 'KILOMETERS', 'value': 99500}",99500
11949,"{'unit': 'KILOMETERS', 'value': 247000}",247000


In [261]:
col = ["vehicle_odometer_data","dist"]
df.drop(col,axis=1,inplace=True)

### Extracting Distance in KM from `vehicle_odometer_data`

- The `vehicle_odometer_data` column contained dictionary-like text values such as:

`{'unit': 'KILOMETERS', 'value': 181000}`

### Step 1: Extracting the Unit
We created a new column `dist` to identify the unit (mainly `KILOMETERS` and a few `MILES`) and checked the distribution using `value_counts()`.

### Step 2: Extracting the Odometer Value
For rows where the unit was **KILOMETERS**, we extracted the numeric `value` using regex and stored it in a new column `distance_in_km`.  
We used the `Int64` datatype to safely handle missing values (`NaN`).

### Note on MILES and Noisy Values
A very small number of rows contained the unit as **MILES**, and the remaining non-standard values were extremely few.  
Since their count was negligible compared to the dataset size, we ignored them to keep the data clean and consistent.

### Final Step: Dropping Temporary Columns
After extracting the distance feature, we dropped the columns:
- `vehicle_odometer_data`
- `dist`

In [262]:
df["price_currency"] = df["formatted_price"].astype(str).str[10:13]
df["price_currency"].value_counts()

price_currency
AU$    11782
         114
FRE       19
A$1       16
A$2       10
A$9        3
A$8        3
A$5        3
A$3        3
A$7        2
Name: count, dtype: int64

In [263]:
df["price_In_Dollars"] = (
    df["formatted_price"]
    .astype(str)
    .str.extract(r"(\d[\d,]*)", expand=False) 
    .str.replace(",", "", regex=False)
    .astype("Int64")
)
df[["price_currency","formatted_price"]][df["price_currency"]=="FRE"].head()

Unnamed: 0,price_currency,formatted_price
413,FRE,{'text': 'FREE'}
595,FRE,{'text': 'FREE'}
742,FRE,{'text': 'FREE'}
1399,FRE,{'text': 'FREE'}
1984,FRE,{'text': 'FREE'}


In [264]:
cols = ["formatted_price","price_currency"]
df.drop(cols,axis=1,inplace=True)

### Cleaning `formatted_price`

The `formatted_price` column had values like `{'text': 'AU$5,900'}`.  
We created `price_currency` to check the currency types and found `AU$`, some `A$1/A$2...` (same currency), and `FRE` (means **FREE**).

Since all `AU$`/`A$` represent Australian Dollars, we ignored currency and extracted only the numeric price value into `price_in_Dollars`.  
`FRE` entries were not used because they have no valid price.

Finally, we dropped the columns:
- `formatted_price`
- `price_currency`


In [265]:
df["location_text"] = df["location_text"].astype(str)
df["state"] = df["location_text"].str.extract(r",\s*([A-Z]{2,3})'", expand=False).str.strip()

In [266]:
df["state"].value_counts()

state
VIC    2727
QLD    2434
NSW    2305
WA     2217
SA     2151
Name: count, dtype: int64

## Extracting City and State from `location_text`

The `location_text` column contained values like `{'text': 'Melbourne, VIC`}`.  
We extracted one new feature from it:

- `state` → state code (e.g., VIC, NSW, QLD)

After extraction, we used `value_counts()` on `state` to check the distribution across different states.

In [267]:
cols = ["location_text","redacted_description","vehicle_specifications"]
df.drop(cols,axis=1,inplace=True)

## Dropping More Columns

- Dropped `location_text` after extracting  `state`, since it was no longer needed.

- Dropped `redacted_description` because it contains user-written descriptions with inconsistent detail (some rows have long descriptions, while many contain very little information like *"good car"* or *"negotiable"*), making it unreliable for modeling.

- Dropped `vehicle_specifications` because most of its key-value pairs were `null`, and only a small number of rows had useful values like horsepower/engine size, so it was not effective for prediction.

- Dropped `price currency` since is the same currency (Australian dollars) for the whole dataset.


In [268]:
df.tail(15)

Unnamed: 0,marketplace_listing_title,vehicle_exterior_color,vehicle_fuel_type,vehicle_interior_color,vehicle_make_display_name,vehicle_model_display_name,vehicle_transmission_type,kms,price_In_Dollars,state
11940,1998 Nissan Pulsar,blue,GASOLINE,grey,Nissan,Pulsar,AUTOMATIC,245930,10,QLD
11941,2013 HSV Maloo GEN-F MY14 R8 Utility Extended ...,red,PETROL,black,HSV,maloo gen-f my14 r8 utility extended cab 2dr m...,MANUAL,75003,80517,SA
11942,2019 Mercedes-Benz cla-class,grey,GASOLINE,black,Mercedes-Benz,,AUTOMATIC,60000,45000,VIC
11943,2011 Toyota estima - 20th anniversary edition,white,PETROL,,Toyota,estima - 20th anniversary edition,AUTOMATIC,116348,15500,VIC
11944,2022 BMW X3,black,GASOLINE,off_white,BMW,X3,AUTOMATIC,75000,56500,VIC
11945,2019 Hyundai Tucson,white,PETROL,charcoal,Hyundai,Tucson,AUTOMATIC,96000,21700,VIC
11946,2008 Nissan Patrol,white,DIESEL,grey,Nissan,Patrol,MANUAL,245000,14000,VIC
11947,2015 Volkswagen Polo GTI RWC+REGO,black,PETROL,,Volkswagen,polo gti rwc+rego,AUTOMATIC,166000,14500,VIC
11948,2020 Volvo XC60,blue,GASOLINE,black,Volvo,XC60,AUTOMATIC,99500,29000,VIC
11949,2019 Ford RANGER WILDTRAK PX MKlll AUTO,silver,DIESEL,black,Ford,ranger wildtrak px mklll auto,AUTOMATIC,247000,27999,SA


In [269]:
df.to_csv("cleaned_car_data.csv", index=False)