### **Imports and Logging Setup**

In [3]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
import logging

logging.basicConfig(filename='../logs/scraping.log', level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

### **WebDriver Initialization**

In [None]:
driver = webdriver.Chrome()
driver.set_page_load_timeout(120)
driver.set_script_timeout(120)
wait = WebDriverWait(driver, 150)

### **Data Scraping Process**

In [None]:
brands = {
    "Mercedes": "https://turbo.az/autos?page={}&q%5Bmake%5D%5B%5D=4&q%5Byear_from%5D=2019",
    "Hyundai": "https://turbo.az/autos?page={}&q%5Bmake%5D%5B%5D=1&q%5Byear_from%5D=2019",
    "Kia": "https://turbo.az/autos?page={}&q%5Bmake%5D%5B%5D=8&q%5Byear_from%5D=2019",
    "BMW": "https://turbo.az/autos?page={}&q%5Bmake%5D%5B%5D=3&q%5Byear_from%5D=2019"
}

all_data = {}

for brand, url in brands.items():
    print(f"Scraping data for {brand}...")
    data = {
        "Price": [], "Make": [], "Model": [], "Year": [],
        "Color": [], "Engine": [], "Kilometer": [], "Transmission": [], "New": []
    }

    try:
        for page in range(1, 23):  # 22 pages max
            # Retry mechanism for page loading
            max_retries = 3
            for attempt in range(max_retries):
                try:
                    driver.get(url.format(page))
                    time.sleep(5)
                    break
                except Exception as error:
                    logging.error(f"Failed to load page {page}. Retrying ({attempt + 1}/{max_retries})... Error: {error}")
                    if attempt == max_retries - 1:
                        raise error

            # Retry mechanism for fetching listings
            for attempt in range(max_retries):
                try:
                    listings = wait.until(EC.visibility_of_all_elements_located((By.CLASS_NAME, "products-i__link")))
                    break
                except Exception as error:
                    logging.error(f"Failed to fetch listings on page {page}. Retrying ({attempt + 1}/{max_retries})... Error: {error}")
                    if attempt == max_retries - 1:
                        raise error

            # Scrape data for each listing
            for listing in listings:
                try:
                    wait.until(EC.element_to_be_clickable(listing))
                    listing.click()
                    driver.switch_to.window(driver.window_handles[-1])  # Switch to the new tab

                    # Retry mechanism for extracting data
                    for attempt in range(max_retries):
                        try:
                            data["Price"].append(wait.until(EC.presence_of_element_located((By.XPATH, "//div[@class='product-price__i product-price__i--bold']"))).text.strip())
                            data["Make"].append(wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "label[for='ad_make_id'] + span"))).text.strip())
                            data["Model"].append(wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "label[for='ad_model'] + span"))).text.strip())
                            data["Year"].append(wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "label[for='ad_reg_year'] + span"))).text.strip())
                            data["Color"].append(wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "label[for='ad_color'] + span"))).text.strip())
                            data["Engine"].append(wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "label[for='ad_engine_volume'] + span"))).text.strip())
                            data["Kilometer"].append(wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "label[for='ad_mileage'] + span"))).text.strip())
                            data["Transmission"].append(wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "label[for='ad_transmission'] + span"))).text.strip())
                            data["New"].append(wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "label[for='ad_new'] + span"))).text.strip())
                            break
                        except Exception as error:
                            logging.error(f"Failed to extract data. Retrying ({attempt + 1}/{max_retries})... Error: {error}")
                            if attempt == max_retries - 1:
                                raise error

                    driver.close()
                    driver.switch_to.window(driver.window_handles[0])  # Switch back to the main tab
                except Exception as error:
                    logging.error(f"Error extracting data from a listing: {error}")
                    driver.close()
                    driver.switch_to.window(driver.window_handles[0])

    except Exception as error:
        logging.critical(f"Critical error occurred while scraping {brand}: {error}")
    finally:
        all_data[brand] = pd.DataFrame(data)
        print(f"Data for {brand} scraped successfully.")

driver.quit()

### **Saving Scraped Data to CSV**

In [None]:
for brand, df in all_data.items():
    df.to_csv(f"../data/raw/{brand.lower()}.csv", index=False)
    print(f"Data for {brand} saved to data/raw/{brand.lower()}.csv")