In [25]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time

# Setting up the Chrome WebDriver
options = webdriver.ChromeOptions()
options.headless = True  # Set headless mode to True to run without opening a browser window
driver = webdriver.Chrome(options=options)

cars_info = {'car_title': [], 'car_price_highlighted': [], 'car_km': [], 'car_hand': [], 'car_fuel': [], 'car_engine': [], 'car_hp': [], 'car_transmission': [], 'car_drive': [], 'car_color': [], 'car_body': [], 'car_location': [], 'car_language': []}
for i in range(1, 130):
    url = f"https://autoboom.co.il/en/used/cars?page={i}"
    driver.get(url)
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "body")))  # Simple condition
    except TimeoutException:
        print("Timed out waiting for page to load: ", url)
        continue
    
    # Scroll to the bottom of the page (if needed)
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)  # Allow extra time for any lazy-loaded elements
    
    # Extract HTML content
    html_content = driver.page_source
    soup = BeautifulSoup(html_content, "html.parser")
    car_boxes = soup.find_all('div', class_='offer_card__content')
    for box in car_boxes:
        car_title = box.find('div', class_='offer_card__title').get_text(strip=True)
        price_div = box.find('div', class_='offer_card__price') or box.find('div', class_='offer_card__price offer_card__price-highlighted')
        if price_div:
            price_span = price_div.find('span', class_='offer_card__price_value')
            if price_span:
                car_price_highlighted = price_span.get_text(strip=True).replace('\xa0', ' ')
            else:
                car_price_highlighted = "N/A"
        else:
            car_price_highlighted = "N/A"
        car_meta = box.find_all('ul', class_='offer_card__meta')
        
        car_km = car_meta[0].find_all('li')[0].get_text(strip=True)
        car_hand = car_meta[0].find_all('li')[1].get_text(strip=True)
        
        car_fuel = car_meta[1].find_all('li')[0].get_text(strip=True)
        car_engine = car_meta[1].find_all('li')[1].get_text(strip=True)
        car_hp = car_meta[1].find_all('li')[2].get_text(strip=True)
        car_transmission = car_meta[1].find_all('li')[3].get_text(strip=True)
        car_drive = car_meta[1].find_all('li')[4].get_text(strip=True)
        car_color = car_meta[1].find_all('li')[5].get_text(strip=True)
        try:
            car_body = car_meta[1].find_all('li')[6].get_text(strip=True)
        except IndexError:
            car_body = "N/A"
        
        try:
            car_location_element = driver.find_element(By.XPATH, '//*[@id="app"]/div[2]/div[1]/main/div[4]/div/div[1]/span/a/div[2]/div/ul[4]/li[2]')
            car_location = car_location_element.text if car_location_element else "N/A"
        except NoSuchElementException:
            car_location = "N/A"
        
        try:
            try:
                car_language = car_meta[2].find_all('li')[2].find('span', class_='offer_card__lang').get_text(strip=True)
            except AttributeError:
                car_language = "N/A"
        except IndexError:
            car_language = "N/A"
        
        cars_info['car_title'].append(car_title)
        cars_info['car_price_highlighted'].append(car_price_highlighted)
        cars_info['car_km'].append(car_km)
        cars_info['car_hand'].append(car_hand)
        cars_info['car_fuel'].append(car_fuel)
        cars_info['car_engine'].append(car_engine)
        cars_info['car_hp'].append(car_hp)
        cars_info['car_transmission'].append(car_transmission)
        cars_info['car_drive'].append(car_drive)
        cars_info['car_color'].append(car_color)
        cars_info['car_body'].append(car_body)
        cars_info['car_location'].append(car_location)
        cars_info['car_language'].append(car_language)


df = pd.DataFrame(cars_info)
# df.to_csv('autoboom_cars_info.csv', index=False)
    

driver.quit()# Display the first few rows of the DataFrame


In [32]:
df

Unnamed: 0,car_title,car_price_highlighted,car_km,car_hand,car_fuel,car_engine,car_hp,car_transmission,car_drive,car_color,car_body,car_location,car_language
0,"Tesla Model 3, 2022",₪ 169 999,47 000 km,2nd hand,Electro,283 hp,Automatic,RWD,White,Sedan,,,
1,"Peugeot 2008, 2018",₪ 55 900,89 000 km,1st hand,Gasoline,1.2 l,110 hp,Automatic,FWD,White,Estate 5-door,,
2,"Toyota Corolla, 2007",₪ 17 000,260 000 km,3rd hand,Gasoline,1.6 l,110 hp,Automatic,FWD,Silver,Sedan,,
3,"MG EHS, 2021",₪ 128 000,27 000 km,2nd hand,Plug-in Hybrid,1.5 l,258 hp,Automatic,FWD,White,SUV 5-doors,,
4,"SEAT Ibiza, 2023",₪ 92 500,24 000 km,1st hand,Gasoline,1.0 l,110 hp,Robotic,FWD,Silver,Mini 5-doors,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5137,"Toyota C-HR, 2020",₪ 112 900,40 086 km,1st hand,Hybrid,1.8 l,122 hp,Automatic,FWD,Light Grey,SUV 5-doors,,
5138,"Suzuki SX4, 2017",₪ 58 700,137 700 km,2nd hand,Gasoline,1.6 l,118 hp,Variable,FWD,Black,SUV 5-doors,,
5139,"Opel Corsa, 2017",₪ 37 300,120 000 km,2nd hand,Gasoline,1.4 l,90 hp,Automatic,FWD,White,Mini 5-doors,,
5140,"Kia Rio, 2014",,253 585 km,3rd hand,Gasoline,1.4 l,109 hp,Automatic,FWD,White,Sedan,,


In [34]:
df_electro = df[df['car_fuel'] == 'Electro'].copy()
df_electro['car_body'] = df_electro['car_color']
df_electro['car_color'] = df_electro['car_drive']
df_electro['car_drive'] = df_electro['car_transmission']
df_electro['car_transmission'] = df_electro['car_hp']
df_electro['car_hp'] = df_electro['car_engine']
df_electro['car_engine'] = 'None'

df_electro


Unnamed: 0,car_title,car_price_highlighted,car_km,car_hand,car_fuel,car_engine,car_hp,car_transmission,car_drive,car_color,car_body,car_location,car_language
0,"Tesla Model 3, 2022",₪ 169 999,47 000 km,2nd hand,Electro,,283 hp,Automatic,RWD,White,Sedan,,
165,"GAC Motor GE3, 2021",₪ 86 000,108 000 km,2nd hand,Electro,,177 hp,Automatic,FWD,White,SUV 5-doors,,
313,"JAC e-S2, 2022",₪ 86 000,50 000 km,1st hand,Electro,,115 hp,Automatic,FWD,White,SUV 5-doors,,
316,"Volvo XC40, 2022",₪ 240 000,30 000 km,1st hand,Electro,,408 hp,Automatic,4x4,Grey,SUV 5-doors,,
374,"MINI Hatch, 2022",₪ 175 000,46 000 km,1st hand,Electro,,184 hp,Automatic,FWD,Black,Mini 3-doors,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5090,"MG ZS, 2020",₪ 98 500,30 022 km,1st hand,Electro,,143 hp,Automatic,FWD,Light blue,SUV 5-doors,,
5110,"MG ZS, 2021",₪ 99 600,72 628 km,1st hand,Electro,,143 hp,Automatic,FWD,Blue,SUV 5-doors,,
5111,"MG ZS, 2021",₪ 99 900,40 116 km,1st hand,Electro,,143 hp,Automatic,FWD,Red,SUV 5-doors,,
5114,"Peugeot 2008, 2022",₪ 109 900,64 000 km,1st hand,Electro,,136 hp,Automatic,FWD,White,SUV 5-doors,,


In [35]:
# Get the indexes of rows in df_electro
electro_indexes = df_electro.index

# Update the rows in df using the indexes from df_electro
df.loc[electro_indexes, 'car_body'] = df_electro['car_body']
df.loc[electro_indexes, 'car_color'] = df_electro['car_color']
df.loc[electro_indexes, 'car_drive'] = df_electro['car_drive']
df.loc[electro_indexes, 'car_transmission'] = df_electro['car_transmission']
df.loc[electro_indexes, 'car_hp'] = df_electro['car_hp']
df.loc[electro_indexes, 'car_engine'] = df_electro['car_engine']




In [36]:
df

Unnamed: 0,car_title,car_price_highlighted,car_km,car_hand,car_fuel,car_engine,car_hp,car_transmission,car_drive,car_color,car_body,car_location,car_language
0,"Tesla Model 3, 2022",₪ 169 999,47 000 km,2nd hand,Electro,,283 hp,Automatic,RWD,White,Sedan,,
1,"Peugeot 2008, 2018",₪ 55 900,89 000 km,1st hand,Gasoline,1.2 l,110 hp,Automatic,FWD,White,Estate 5-door,,
2,"Toyota Corolla, 2007",₪ 17 000,260 000 km,3rd hand,Gasoline,1.6 l,110 hp,Automatic,FWD,Silver,Sedan,,
3,"MG EHS, 2021",₪ 128 000,27 000 km,2nd hand,Plug-in Hybrid,1.5 l,258 hp,Automatic,FWD,White,SUV 5-doors,,
4,"SEAT Ibiza, 2023",₪ 92 500,24 000 km,1st hand,Gasoline,1.0 l,110 hp,Robotic,FWD,Silver,Mini 5-doors,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5137,"Toyota C-HR, 2020",₪ 112 900,40 086 km,1st hand,Hybrid,1.8 l,122 hp,Automatic,FWD,Light Grey,SUV 5-doors,,
5138,"Suzuki SX4, 2017",₪ 58 700,137 700 km,2nd hand,Gasoline,1.6 l,118 hp,Variable,FWD,Black,SUV 5-doors,,
5139,"Opel Corsa, 2017",₪ 37 300,120 000 km,2nd hand,Gasoline,1.4 l,90 hp,Automatic,FWD,White,Mini 5-doors,,
5140,"Kia Rio, 2014",,253 585 km,3rd hand,Gasoline,1.4 l,109 hp,Automatic,FWD,White,Sedan,,


In [38]:
df['car_drive'].unique()


array(['RWD', 'FWD', '4x4'], dtype=object)

In [37]:
df.to_csv('autoboom_cars_info.csv', index=False)