# Used car prices - Autotrade

## Data Cleaning

#### Data source: https://www.autotrader.com.au/
#### Scope: Perth WA, 25km radius from postcode 6000

In [369]:
# Import libraries
import numpy as np
import pandas as pd
from pathlib import Path
import numpy as np
import requests
import time
import json

# Import the API key
from config import geoapify_key

# Turn off warning messages
import warnings
warnings.filterwarnings("ignore")

##### Load the data

In [323]:
# Read the CSV file from the Resources folder into a Pandas DataFrame
df = pd.read_csv(Path('output_from_1_to_51.csv'))

# Review the DataFrame
df.head()

Unnamed: 0,Year model,Car Spec,Kilometres,Seller type,Price,Transmission,Body type,Drive type,Engine,Fuel type,...,Make,Model,Variant,Series,Warranty when new (months),Warranty when new (kms),Service interval (months),Service interval (kms),Country of origin,Vehicle segment
0,2007,2007 Audi TT 8J Coupe 2dr S tronic 6sp 2.0T,"104,108km",Dealer: Used,"$17,990",Sports Automatic Dual Clutch,-,-,-,,...,Audi,TT,8J Coupe 2dr S tronic 6sp 2.0T,,,,,,,
1,2019,2019 Hyundai I30 Active PD2 MY19,"74,070km",Dealer: Used,"$20,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,"4 cyl, 2 L",Unleaded Petrol,...,Hyundai,I30,Active,PD2 MY19,60.0,999000.0,12.0,15000.0,,
2,2019,2019 Hyundai I30 Active PD2 MY19,"66,975km",Dealer: Used,"$21,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,"4 cyl, 2 L",Unleaded Petrol,...,Hyundai,I30,Active,PD2 MY19,60.0,999000.0,12.0,15000.0,,
3,2019,2019 Hyundai I30 Active PD2 MY19,"68,845km",Dealer: Used,"$21,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,"4 cyl, 2 L",Unleaded Petrol,...,Hyundai,I30,Active,PD2 MY19,60.0,999000.0,12.0,15000.0,,
4,2017,2017 Mitsubishi Triton GLX (4X4) MQ MY18,"188,978km",Dealer: Used,"$24,750",Automatic,"Ute Tray, 4 Doors, 5 Seats",4x4,"4 cyl, 2.4 L",Diesel,...,Mitsubishi,Triton,GLX (4X4),MQ MY18,60.0,100000.0,12.0,15000.0,,


##### Clean the data

In [324]:
# Check initial data info types and null values
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1194 entries, 0 to 1193
Data columns (total 57 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   Year model                   1194 non-null   int64  
 1   Car Spec                     1191 non-null   object 
 2   Kilometres                   1191 non-null   object 
 3   Seller type                  1191 non-null   object 
 4   Price                        1191 non-null   object 
 5   Transmission                 1191 non-null   object 
 6   Body type                    1191 non-null   object 
 7   Drive type                   1191 non-null   object 
 8   Engine                       1191 non-null   object 
 9   Fuel type                    1090 non-null   object 
 10  Fuel consumption             1191 non-null   object 
 11  Colour ext / int             1191 non-null   object 
 12  Registration                 1191 non-null   object 
 13  Rego expiry       

In [325]:
# Drop columns no needed
df_clean = df.drop(['Seller type', 'Engine', 'Registration', 'Rego expiry', 'Stock No', 'ANCAP Safety rating', 'Green overall rating', 'Front tyre size',
       'Front rim size', 'Rear tyre size', 'Rear rim size', 'Injection / Carburation','Front suspension', 'Rear suspension', 'Front brakes', 'Rear brakes',
       'CO2 level (g/km)', 'Green house rating', 'Overall HxWxL', 'Ground clearance unladen', 'Wheelbase', 'Turning circle', 'Rear track', 'Front track',
       'Gross trailer weight braked', 'Series', 'Warranty when new (months)', 'Warranty when new (kms)', 'Service interval (months)', 'Service interval (kms)',
       'Country of origin', 'Vehicle segment'], axis=1)

df_clean.head()


Unnamed: 0,Year model,Car Spec,Kilometres,Price,Transmission,Body type,Drive type,Fuel type,Fuel consumption,Colour ext / int,...,CC,Number of cylinders,Fuel tank capacity,Valve gear type,Maximum torque,Maximum power (kW),Kerb weight,Make,Model,Variant
0,2007,2007 Audi TT 8J Coupe 2dr S tronic 6sp 2.0T,"104,108km","$17,990",Sports Automatic Dual Clutch,-,-,,-,Blue / -,...,,,,,-,-,-,Audi,TT,8J Coupe 2dr S tronic 6sp 2.0T
1,2019,2019 Hyundai I30 Active PD2 MY19,"74,070km","$20,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,Unleaded Petrol,7.40 L / 100 km,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
2,2019,2019 Hyundai I30 Active PD2 MY19,"66,975km","$21,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,Unleaded Petrol,7.40 L / 100 km,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
3,2019,2019 Hyundai I30 Active PD2 MY19,"68,845km","$21,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,Unleaded Petrol,7.40 L / 100 km,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
4,2017,2017 Mitsubishi Triton GLX (4X4) MQ MY18,"188,978km","$24,750",Automatic,"Ute Tray, 4 Doors, 5 Seats",4x4,Diesel,7.60 L / 100 km,White / Black,...,2440.0,4.0,75.0,16.0,430 @ 2500 RPM,133 @ 3500 RPM,1930 kg,Mitsubishi,Triton,GLX (4X4)


In [326]:
# Drop null rows and duplicates
df_clean = df_clean.drop_duplicates(subset="VIN")
df_clean = df_clean.dropna()
df_clean.head()

Unnamed: 0,Year model,Car Spec,Kilometres,Price,Transmission,Body type,Drive type,Fuel type,Fuel consumption,Colour ext / int,...,CC,Number of cylinders,Fuel tank capacity,Valve gear type,Maximum torque,Maximum power (kW),Kerb weight,Make,Model,Variant
1,2019,2019 Hyundai I30 Active PD2 MY19,"74,070km","$20,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,Unleaded Petrol,7.40 L / 100 km,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
2,2019,2019 Hyundai I30 Active PD2 MY19,"66,975km","$21,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,Unleaded Petrol,7.40 L / 100 km,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
3,2019,2019 Hyundai I30 Active PD2 MY19,"68,845km","$21,998",Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,Unleaded Petrol,7.40 L / 100 km,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
4,2017,2017 Mitsubishi Triton GLX (4X4) MQ MY18,"188,978km","$24,750",Automatic,"Ute Tray, 4 Doors, 5 Seats",4x4,Diesel,7.60 L / 100 km,White / Black,...,2440.0,4.0,75.0,16.0,430 @ 2500 RPM,133 @ 3500 RPM,1930 kg,Mitsubishi,Triton,GLX (4X4)
5,2016,2016 Mitsubishi Triton GLX (4X4) MQ MY16,"141,855km","$26,900",Automatic,"Ute Tray, 4 Doors, 5 Seats",4x4,Diesel,7.60 L / 100 km,White / Black,...,2440.0,4.0,75.0,16.0,430 @ 2500 RPM,133 @ 3500 RPM,1930 kg,Mitsubishi,Triton,GLX (4X4)


In [327]:
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 507 entries, 1 to 1193
Data columns (total 25 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Year model           507 non-null    int64  
 1   Car Spec             507 non-null    object 
 2   Kilometres           507 non-null    object 
 3   Price                507 non-null    object 
 4   Transmission         507 non-null    object 
 5   Body type            507 non-null    object 
 6   Drive type           507 non-null    object 
 7   Fuel type            507 non-null    object 
 8   Fuel consumption     507 non-null    object 
 9   Colour ext / int     507 non-null    object 
 10  VIN                  507 non-null    object 
 11  Dealer               507 non-null    object 
 12  Address              507 non-null    object 
 13  Seating capacity     507 non-null    float64
 14  Doors                507 non-null    float64
 15  CC                   507 non-null    fl

In [328]:
# Convert Kilometres and price to integer
df_clean['Kilometres'] = df_clean['Kilometres'].str.replace(',', '').str.replace('km', '').astype('int64')
df_clean['Price'] = df_clean['Price'].str.replace('$', '').str.replace(',', '').astype('int64')
df_clean.head()

Unnamed: 0,Year model,Car Spec,Kilometres,Price,Transmission,Body type,Drive type,Fuel type,Fuel consumption,Colour ext / int,...,CC,Number of cylinders,Fuel tank capacity,Valve gear type,Maximum torque,Maximum power (kW),Kerb weight,Make,Model,Variant
1,2019,2019 Hyundai I30 Active PD2 MY19,74070,20998,Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,Unleaded Petrol,7.40 L / 100 km,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
2,2019,2019 Hyundai I30 Active PD2 MY19,66975,21998,Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,Unleaded Petrol,7.40 L / 100 km,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
3,2019,2019 Hyundai I30 Active PD2 MY19,68845,21998,Automatic,"HATCH, 4 Doors, 5 Seats",Front Wheel Drive,Unleaded Petrol,7.40 L / 100 km,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
4,2017,2017 Mitsubishi Triton GLX (4X4) MQ MY18,188978,24750,Automatic,"Ute Tray, 4 Doors, 5 Seats",4x4,Diesel,7.60 L / 100 km,White / Black,...,2440.0,4.0,75.0,16.0,430 @ 2500 RPM,133 @ 3500 RPM,1930 kg,Mitsubishi,Triton,GLX (4X4)
5,2016,2016 Mitsubishi Triton GLX (4X4) MQ MY16,141855,26900,Automatic,"Ute Tray, 4 Doors, 5 Seats",4x4,Diesel,7.60 L / 100 km,White / Black,...,2440.0,4.0,75.0,16.0,430 @ 2500 RPM,133 @ 3500 RPM,1930 kg,Mitsubishi,Triton,GLX (4X4)


In [329]:
# Check datatypes
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 507 entries, 1 to 1193
Data columns (total 25 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Year model           507 non-null    int64  
 1   Car Spec             507 non-null    object 
 2   Kilometres           507 non-null    int64  
 3   Price                507 non-null    int64  
 4   Transmission         507 non-null    object 
 5   Body type            507 non-null    object 
 6   Drive type           507 non-null    object 
 7   Fuel type            507 non-null    object 
 8   Fuel consumption     507 non-null    object 
 9   Colour ext / int     507 non-null    object 
 10  VIN                  507 non-null    object 
 11  Dealer               507 non-null    object 
 12  Address              507 non-null    object 
 13  Seating capacity     507 non-null    float64
 14  Doors                507 non-null    float64
 15  CC                   507 non-null    fl

In [330]:
# Binning transmission values for items < 60
transmission = df_clean['Transmission'].value_counts()
transmission_replace = list(transmission[transmission < 60].index)

for trans in transmission_replace:
    df_clean['Transmission'] = df_clean['Transmission'].replace(trans,"Other")

df_clean['Transmission'].value_counts()

Automatic              259
Other                   97
Manual                  90
Continuous Variable     61
Name: Transmission, dtype: int64

In [331]:
# Extract the body type only and rmeove seats and doors, and adjust capital letters
df_clean['Body type'] = df_clean['Body type'].str.split(',').str[0].str.strip()
df_clean['Body type'] = df_clean['Body type'].str.lower().str.capitalize()
body_type = df_clean['Body type'].value_counts()

# Binning the body types
body_type_replace = list(body_type[body_type < 10].index)

for body in body_type_replace:
    df_clean['Body type'] = df_clean['Body type'].replace(body,"Other")

df_clean['Body type'].value_counts()

Suv         177
Hatch       127
Sedan        79
Ute tray     72
Wagon        30
Other        12
Coupe        10
Name: Body type, dtype: int64

In [332]:
# Adjust drive type to change Four Wheel Drive to 4x4
df_clean['Drive type'] = df_clean['Drive type'].replace('Four Wheel Drive', '4x4')
df_clean['Drive type'].value_counts()

Front Wheel Drive    247
4x4                  101
All Wheel Drive       82
Rear Wheel Drive      77
Name: Drive type, dtype: int64

In [333]:
# Extract the fuel consumption liters part
df_clean['Fuel consumption'] = df_clean['Fuel consumption'].str.extract(r'^(\d+(\.\d+)?)\s').astype('float64')
df_clean = df_clean.rename(columns={'Fuel consumption': 'Fuel consumption per 100km'})
df_clean.head()

Unnamed: 0,Year model,Car Spec,Kilometres,Price,Transmission,Body type,Drive type,Fuel type,Fuel consumption per 100km,Colour ext / int,...,CC,Number of cylinders,Fuel tank capacity,Valve gear type,Maximum torque,Maximum power (kW),Kerb weight,Make,Model,Variant
1,2019,2019 Hyundai I30 Active PD2 MY19,74070,20998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
2,2019,2019 Hyundai I30 Active PD2 MY19,66975,21998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
3,2019,2019 Hyundai I30 Active PD2 MY19,68845,21998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver / -,...,1999.0,4.0,50.0,16.0,203 @ 4700 RPM,120 @ 6200 RPM,1276 kg,Hyundai,I30,Active
4,2017,2017 Mitsubishi Triton GLX (4X4) MQ MY18,188978,24750,Automatic,Ute tray,4x4,Diesel,7.6,White / Black,...,2440.0,4.0,75.0,16.0,430 @ 2500 RPM,133 @ 3500 RPM,1930 kg,Mitsubishi,Triton,GLX (4X4)
5,2016,2016 Mitsubishi Triton GLX (4X4) MQ MY16,141855,26900,Automatic,Ute tray,4x4,Diesel,7.6,White / Black,...,2440.0,4.0,75.0,16.0,430 @ 2500 RPM,133 @ 3500 RPM,1930 kg,Mitsubishi,Triton,GLX (4X4)


In [334]:
# Adjust colour coloumn to only ext colour
df_clean['Colour ext / int'] = df_clean['Colour ext / int'].str.split().str[0]
df_clean = df_clean.rename(columns={'Colour ext / int': 'Colour ext'})
colour_ext = df_clean['Colour ext'].value_counts()

# Binning the colours
colour_ext_replace = list(colour_ext[colour_ext < 10].index)

for colour in colour_ext_replace:
    df_clean['Colour ext'] = df_clean['Colour ext'].replace(colour,"Other")

df_clean['Colour ext'].value_counts()

White     196
Silver     75
Grey       64
Blue       51
Black      47
Red        37
Other      37
Name: Colour ext, dtype: int64

In [335]:
df_clean.columns

Index(['Year model', 'Car Spec', 'Kilometres', 'Price', 'Transmission',
       'Body type', 'Drive type', 'Fuel type', 'Fuel consumption per 100km',
       'Colour ext', 'VIN', 'Dealer', 'Address', 'Seating capacity', 'Doors',
       'CC', 'Number of cylinders', 'Fuel tank capacity', 'Valve gear type',
       'Maximum torque', 'Maximum power (kW)', 'Kerb weight', 'Make', 'Model',
       'Variant'],
      dtype='object')

In [336]:
# Change datatypes of some numerical columns from float to integers
columns_to_convert = ['Seating capacity', 'Doors','CC', 'Number of cylinders', 'Fuel tank capacity', 'Valve gear type']
df_clean[columns_to_convert] = df_clean[columns_to_convert].astype('int64')
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 507 entries, 1 to 1193
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Year model                  507 non-null    int64  
 1   Car Spec                    507 non-null    object 
 2   Kilometres                  507 non-null    int64  
 3   Price                       507 non-null    int64  
 4   Transmission                507 non-null    object 
 5   Body type                   507 non-null    object 
 6   Drive type                  507 non-null    object 
 7   Fuel type                   507 non-null    object 
 8   Fuel consumption per 100km  507 non-null    float64
 9   Colour ext                  507 non-null    object 
 10  VIN                         507 non-null    object 
 11  Dealer                      507 non-null    object 
 12  Address                     507 non-null    object 
 13  Seating capacity            507 no

In [337]:
# Convert Kerb weight from string to integer
df_clean['Kerb weight'] = df_clean['Kerb weight'].str.split().str[0]
df_clean['Kerb weight'].astype('int64')
df_clean.head()

Unnamed: 0,Year model,Car Spec,Kilometres,Price,Transmission,Body type,Drive type,Fuel type,Fuel consumption per 100km,Colour ext,...,CC,Number of cylinders,Fuel tank capacity,Valve gear type,Maximum torque,Maximum power (kW),Kerb weight,Make,Model,Variant
1,2019,2019 Hyundai I30 Active PD2 MY19,74070,20998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver,...,1999,4,50,16,203 @ 4700 RPM,120 @ 6200 RPM,1276,Hyundai,I30,Active
2,2019,2019 Hyundai I30 Active PD2 MY19,66975,21998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver,...,1999,4,50,16,203 @ 4700 RPM,120 @ 6200 RPM,1276,Hyundai,I30,Active
3,2019,2019 Hyundai I30 Active PD2 MY19,68845,21998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver,...,1999,4,50,16,203 @ 4700 RPM,120 @ 6200 RPM,1276,Hyundai,I30,Active
4,2017,2017 Mitsubishi Triton GLX (4X4) MQ MY18,188978,24750,Automatic,Ute tray,4x4,Diesel,7.6,White,...,2440,4,75,16,430 @ 2500 RPM,133 @ 3500 RPM,1930,Mitsubishi,Triton,GLX (4X4)
5,2016,2016 Mitsubishi Triton GLX (4X4) MQ MY16,141855,26900,Automatic,Ute tray,4x4,Diesel,7.6,White,...,2440,4,75,16,430 @ 2500 RPM,133 @ 3500 RPM,1930,Mitsubishi,Triton,GLX (4X4)


In [338]:
# Last drop of rows where values are 0
df_clean = df_clean.loc[(df_clean != 0).all(axis=1)]
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 503 entries, 1 to 1193
Data columns (total 25 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Year model                  503 non-null    int64  
 1   Car Spec                    503 non-null    object 
 2   Kilometres                  503 non-null    int64  
 3   Price                       503 non-null    int64  
 4   Transmission                503 non-null    object 
 5   Body type                   503 non-null    object 
 6   Drive type                  503 non-null    object 
 7   Fuel type                   503 non-null    object 
 8   Fuel consumption per 100km  503 non-null    float64
 9   Colour ext                  503 non-null    object 
 10  VIN                         503 non-null    object 
 11  Dealer                      503 non-null    object 
 12  Address                     503 non-null    object 
 13  Seating capacity            503 no

In [340]:
## Add columns to the df where by using the Geoapify API will fetch latitude and longitude
df_clean["lat"] = ""
df_clean["lon"] = ""
df_clean.head()

Unnamed: 0,Year model,Car Spec,Kilometres,Price,Transmission,Body type,Drive type,Fuel type,Fuel consumption per 100km,Colour ext,...,Fuel tank capacity,Valve gear type,Maximum torque,Maximum power (kW),Kerb weight,Make,Model,Variant,lat,lon
1,2019,2019 Hyundai I30 Active PD2 MY19,74070,20998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver,...,50,16,203 @ 4700 RPM,120 @ 6200 RPM,1276,Hyundai,I30,Active,,
2,2019,2019 Hyundai I30 Active PD2 MY19,66975,21998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver,...,50,16,203 @ 4700 RPM,120 @ 6200 RPM,1276,Hyundai,I30,Active,,
3,2019,2019 Hyundai I30 Active PD2 MY19,68845,21998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver,...,50,16,203 @ 4700 RPM,120 @ 6200 RPM,1276,Hyundai,I30,Active,,
4,2017,2017 Mitsubishi Triton GLX (4X4) MQ MY18,188978,24750,Automatic,Ute tray,4x4,Diesel,7.6,White,...,75,16,430 @ 2500 RPM,133 @ 3500 RPM,1930,Mitsubishi,Triton,GLX (4X4),,
5,2016,2016 Mitsubishi Triton GLX (4X4) MQ MY16,141855,26900,Automatic,Ute tray,4x4,Diesel,7.6,White,...,75,16,430 @ 2500 RPM,133 @ 3500 RPM,1930,Mitsubishi,Triton,GLX (4X4),,


In [344]:
# Define the API parameters
base_url = "https://api.geoapify.com/v1/geocode/search?"
params = {
    "apiKey": geoapify_key,
    "format": "json"
}

In [345]:
# Print a message to follow up the lat and lon search
print("Starting lat and lon search:")
print("----------------------------")

# Loop through the customers_df DataFrame and search coordinates for each customer
for i, row in df_clean.iterrows():
    params["text"] = f'{row["Address"]}'

    # Make the API request
    response = requests.get(base_url, params=params)

    # Convert reponse to JSON
    response = response.json()

    # Extract latitude and longitude
    lat = response["results"][0]["lat"]
    lon = response["results"][0]["lon"]

    # Add lat and lon to the customers_df DataFrame
    df_clean.loc[i, "lat"] = lat
    df_clean.loc[i, "lon"] = lon

    # Log the search results
    print(f"Coordinates for {row['Address']} fetched. Lat: {lat}, Lon: {lon}.")

# Display sample data to confirm that the coordinates appear
df_clean.head()


Starting lat and lon search:
----------------------------
Coordinates for 1324 Albany Highway, Cannington, WA fetched. Lat: -32.0166894, Lon: 115.9322155.
Coordinates for 1324 Albany Highway, Cannington, WA fetched. Lat: -32.0166894, Lon: 115.9322155.
Coordinates for 1324 Albany Highway, Cannington, WA fetched. Lat: -32.0166894, Lon: 115.9322155.
Coordinates for 76 Division Street, Welshpool, WA fetched. Lat: -31.987188, Lon: 115.935923.
Coordinates for 76 Division Street, Welshpool, WA fetched. Lat: -31.987188, Lon: 115.935923.
Coordinates for 170 Welshpool Road, Welshpool, WA fetched. Lat: -31.995227, Lon: 115.938215.
Coordinates for 76 Division Street, Welshpool, WA fetched. Lat: -31.987188, Lon: 115.935923.
Coordinates for 464 Canning Highway, Como, WA fetched. Lat: -32.00604, Lon: 115.8597.
Coordinates for 76 Division Street, Welshpool, WA fetched. Lat: -31.987188, Lon: 115.935923.
Coordinates for 1093 Albany Highway, Bentley, WA fetched. Lat: -31.999547, Lon: 115.916029.
Coordina

Unnamed: 0,Year model,Car Spec,Kilometres,Price,Transmission,Body type,Drive type,Fuel type,Fuel consumption per 100km,Colour ext,...,Fuel tank capacity,Valve gear type,Maximum torque,Maximum power (kW),Kerb weight,Make,Model,Variant,lat,lon
1,2019,2019 Hyundai I30 Active PD2 MY19,74070,20998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver,...,50,16,203 @ 4700 RPM,120 @ 6200 RPM,1276,Hyundai,I30,Active,-32.0167,115.932
2,2019,2019 Hyundai I30 Active PD2 MY19,66975,21998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver,...,50,16,203 @ 4700 RPM,120 @ 6200 RPM,1276,Hyundai,I30,Active,-32.0167,115.932
3,2019,2019 Hyundai I30 Active PD2 MY19,68845,21998,Automatic,Hatch,Front Wheel Drive,Unleaded Petrol,7.4,Silver,...,50,16,203 @ 4700 RPM,120 @ 6200 RPM,1276,Hyundai,I30,Active,-32.0167,115.932
4,2017,2017 Mitsubishi Triton GLX (4X4) MQ MY18,188978,24750,Automatic,Ute tray,4x4,Diesel,7.6,White,...,75,16,430 @ 2500 RPM,133 @ 3500 RPM,1930,Mitsubishi,Triton,GLX (4X4),-31.9872,115.936
5,2016,2016 Mitsubishi Triton GLX (4X4) MQ MY16,141855,26900,Automatic,Ute tray,4x4,Diesel,7.6,White,...,75,16,430 @ 2500 RPM,133 @ 3500 RPM,1930,Mitsubishi,Triton,GLX (4X4),-31.9872,115.936


In [354]:
# Change lat and lon to float
df_clean = df_clean.astype({'lat':'float', 'lon': 'float'})
df_clean.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 503 entries, 1 to 1193
Data columns (total 27 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Year model                  503 non-null    int64  
 1   Car Spec                    503 non-null    object 
 2   Kilometres                  503 non-null    int64  
 3   Price                       503 non-null    int64  
 4   Transmission                503 non-null    object 
 5   Body type                   503 non-null    object 
 6   Drive type                  503 non-null    object 
 7   Fuel type                   503 non-null    object 
 8   Fuel consumption per 100km  503 non-null    float64
 9   Colour ext                  503 non-null    object 
 10  VIN                         503 non-null    object 
 11  Dealer                      503 non-null    object 
 12  Address                     503 non-null    object 
 13  Seating capacity            503 no

In [365]:
# Save the new cleaned dataset
df_clean.to_csv('used_cars_dataset.csv', index=False)