In [12]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pandas as pd
import os
import time

CHROMEDRIVER_PATH = ""

def create_driver():
    options = Options()
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    options.add_argument('--disable-gpu')
    options.add_argument('--window-size=1920x1080')
    options.add_argument('--start-maximized')
    options.add_argument('--enable-logging')
    options.add_argument("--profile-directory=Default")
    options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/122.0.0.0 Safari/537.36")
    service = Service(CHROMEDRIVER_PATH, log_path=os.devnull)
    return webdriver.Chrome(service=service, options=options)


def login(driver, url):
    driver.get(url)
    wait = WebDriverWait(driver, 20)
    
    # Fill in phone number
    phone_input = wait.until(EC.presence_of_element_located((By.NAME, 'phoneNumber')))
    phone_input.clear()
    phone_input.send_keys()  # replace with your phone number
    
    # Fill in password
    password_input = wait.until(EC.presence_of_element_located((By.ID, 'password')))
    password_input.clear()
    password_input.send_keys('')  # be careful with escaping slashes

    # Click login
    login_btn = wait.until(EC.presence_of_element_located((By.ID, 'phone-login-submit-btn')))
    login_btn.click()
    
    return wait


def get_data(driver, wait):
    titles, prices, profits ,images_url= [], [], [] , []
    page = 1
    
    while page < 20:
        try:
            wait.until(EC.presence_of_all_elements_located((By.TAG_NAME, 'app-product-card')))
            product_cards = driver.find_elements(By.TAG_NAME, 'app-product-card')
            
            for card in product_cards:
                try:
                    title = card.find_element(By.CSS_SELECTOR, 'a.product-title').text
                except:
                    title = ''
                try:
                    price = card.find_element(By.CSS_SELECTOR, 'div.sale-product-price').text
                except:
                    price = ''
                try:
                    profit = card.find_element(By.CSS_SELECTOR, 'div.sale-product-profit').text
                except:
                    profit = ''
                try:
                    image_url = card.find_element(By.CSS_SELECTOR, 'img').get_attribute('src')
                except:
                    image_url = ''
                titles.append(title)
                prices.append(price)
                profits.append(profit)
                images_url.append(image_url)

            page += 1
            try:
                link = driver.find_element(By.XPATH, f'//a[@class="page-link" and text()="{page}"]')
                driver.execute_script("arguments[0].click();", link)
                time.sleep(2)  # small delay to let the page load
                print(f"Navigated to Page {page}")
            except:
                print("No more pages")
                break
        except Exception as e:
            print("Error scraping products:", e)
            break

    df = pd.DataFrame({'Title': titles, 'Price': prices, 'Profit': profits,'Images' : images_url})
    # Save to Excel with Arabic text support
    output_path = "taager_products.xlsx"
    data.to_excel(output_path, index=False)
    print(f"Data saved to {output_path}")

    return df


if __name__ == "__main__":
    driver = create_driver()
    try:
        wait = login(driver, "https://taager.com/ae/products/category/5b14ad4d-8cad-4886-b252-6ffa636388a9")
        time.sleep(5)  # wait for redirect after login
        
        print("Done")
    finally:
        driver.quit()


Navigated to Page 2
Navigated to Page 3
Navigated to Page 4
Navigated to Page 5
Navigated to Page 6
Navigated to Page 7
Navigated to Page 8
Navigated to Page 9
Navigated to Page 10
Navigated to Page 11
Navigated to Page 12
Navigated to Page 13
Navigated to Page 14
Navigated to Page 15
No more pages
Data saved to taager_products.xlsx
Done


In [9]:
import requests
from pathlib import Path

def download_images(image_urls, folder="product_images"):
    Path(folder).mkdir(parents=True, exist_ok=True)
    
    for idx, url in enumerate(image_urls):
        try:
            response = requests.get(url, stream=True, timeout=10)
            if response.status_code == 200:
                file_path = Path(folder) / f"product_{idx+1}.png"
                with open(file_path, 'wb') as f:
                    for chunk in response.iter_content(1024):
                        f.write(chunk)
                print(f"Saved: {file_path}")
            else:
                print(f"Failed to download {url}")
        except Exception as e:
            print(f"Error downloading {url}: {e}")


In [None]:
download_images(data['Image URL'].tolist())
data.to_excel("taager_products.xlsx", index=False)
