### Import and make soup

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

# www.carsales.com.au
url = 'https://www.carsales.com.au/cars/dealer/private/demo/volkswagen/passat/b8-series/victoria-state/melbourne-region/?sortby=~Price&offset=0&setype=sort&limit=12'


html = requests.get(url).text
soup = BeautifulSoup(html,'html.parser')
price = soup.find_all('div',{'class':'pad-top-5 price-column ad-price'})
feature_section = soup.find_all('div',{'class':'vehicle-features'}) 
year = soup.find_all('span',{'class':'has-tooltip'})

### Get price list / drive_away[] / excl_gov_charge[]

In [2]:
drive_away = []
excl_gov_charge = []

for item in price:
    price_desc = item.find('div',{'class':'price-desc'}).text
    if price_desc == 'Excl. Govt. Charges':
        excl_gov_charge_price = int(item.findAll('div',{'class':'price'})[0].text.replace('$','').replace(',','').replace('*',''))
        
        # gov charge (stamp duty) in VIC: https://www.carsguide.com.au/car-advice/stamp-duty-for-cars-explained-31021
        # 1.042 under $66331, 1.052 above $66331
        
        if excl_gov_charge_price < 66331:
            drive_away_price = 1.042 * excl_gov_charge_price 
        else:
            drive_away_price = 1.052 * excl_gov_charge_price
    else:
        drive_away_price = int(item.findAll('div',{'class':'price'})[0].text.replace('$','').replace(',','').replace('*',''))
        try:
            excl_gov_charge_price = int(item.findAll('div',{'class':'price-alt'})[0].find('strong').text.replace('$','').replace(',','').replace('*',''))
        except:
            if drive_away_price < 69780.212:
                excl_gov_charge_price = drive_away_price/1.042
            else:
                excl_gov_charge_price = drive_away_price/1.052
        pass    
    drive_away.append(drive_away_price)
    excl_gov_charge.append(excl_gov_charge_price)

### Get link list / links[]

In [4]:
links = []
base_link = 'www.carsales.com.au'
for item in price:
    link = base_link + item.find('a').get('href')
    links.append(link)


### Get features / odometers[] / engines[]

In [5]:
odometers = []
engines = []
for item in feature_section:
    feature = item.find_all('div',{'class':'feature-text'})
    odometer = int(feature[0].text.replace(' ','').replace('km','').replace(',',''))
    engine = feature[3].text
    odometers.append(odometer)
    engines.append(engine)

### Get year/ build_year[]

In [6]:
year = soup.find_all('span',{'class':'has-tooltip'})
build_year = []
for item in year:
    year_num = float(item.text.replace('M','').replace('Y','').replace(' ','').replace('\n',''))
    build_year.append(year_num)

### Make DataFrame

In [7]:
page_info = pd.DataFrame({'Drive_away_price(AUD)': drive_away,
                          'Excl_gov_charge(AUD)':excl_gov_charge,
                          'Odometer(km)':odometers,'Engine':engines,
                          'Build_year':build_year,'link':links})
print(page_info)

    Drive_away_price(AUD)  Excl_gov_charge(AUD)  Odometer(km)  \
0                26990.00          25902.111324         32000   
1                29788.00          27500.000000         48203   
2                27500.00          26391.554702         46975   
3                29165.58          27990.000000         47968   
4                29165.58          27990.000000         28678   
5                29500.00          28310.940499         34000   
6                30990.00          29613.000000          3398   
7                29990.00          28781.190019        123392   
8                31990.00          30700.575816         69493   
9                33488.00          32096.000000         35098   
10               33000.00          31669.865643         49588   
11               33880.00          32514.395393          2100   

                    Engine  Build_year  \
0   4cyl 1.8L Turbo Petrol        16.0   
1   4cyl 2.0L Turbo Diesel        16.0   
2   4cyl 1.8L Turbo Petrol  