In [1]:
import requests
from bs4 import BeautifulSoup
import os
import time
import random
import re
from tqdm import tqdm

# Configuration
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Referer": "https://divar.ir/",
    "Accept-Language": "en-US,en;q=0.5"
}

# Target models with search queries (URL encoded)
target_models = {
    "پژو206": "پژو%20206",
    "پژو207": "پژو%20207",
    "سمند": "سمند",
    "دنا": "دنا",
    "تارا": "تارا",
    "رانا": "رانا",
}

BASE_URL = "https://divar.ir/s/iran/car?q={query}"
NUM_PAGES_TO_SCRAPE = 10
MAX_IMAGES_PER_MODEL = 200
DOWNLOAD_DELAY = (1, 3)  # Random delay between downloads in seconds

def create_folders():
    """Create folders for each car model"""
    base_folder = "Cars"
    os.makedirs(base_folder, exist_ok=True)
    for model in target_models:
        os.makedirs(os.path.join(base_folder, model), exist_ok=True)
    return base_folder

def get_image_urls(page_url):
    """Extract image URLs from a Divar page"""
    try:
        response = requests.get(page_url, headers=headers, timeout=15)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        image_urls = []
        image_tags = soup.select('img.kt-image-block__image')
        
        for img in image_tags:
            img_url = img.get('data-src') or img.get('src')
            if img_url and img_url.startswith('https://s100.divarcdn.com/static/photo'):
                image_urls.append(img_url)
        
        return list(set(image_urls))
    
    except Exception as e:
        print(f"Error getting image URLs from {page_url}: {e}")
        return []

def scrape_model(model_name, query, base_folder):
    """Scrape images for a specific car model"""
    print(f"\nStarting scraping for {model_name}...")
    all_image_urls = []
    
    # Scrape multiple pages
    for page_num in tqdm(range(1, NUM_PAGES_TO_SCRAPE + 1), desc=f"Scraping {model_name} pages"):
        page_url = BASE_URL.format(query=query) + f"&page={page_num}"
        image_urls = get_image_urls(page_url)
        all_image_urls.extend(image_urls)
        time.sleep(random.uniform(*DOWNLOAD_DELAY))
    
    if not all_image_urls:
        print(f"No images found for {model_name}!")
        return 0
    
    print(f"Found {len(all_image_urls)} images for {model_name}")
    
    # Download images
    success_count = 0
    model_folder = os.path.join(base_folder, model_name)
    
    for i, url in enumerate(tqdm(all_image_urls[:MAX_IMAGES_PER_MODEL], desc=f"Downloading {model_name}")):
        try:
            # Try to get higher quality version
            high_quality_url = url.replace('webp_thumbnail', 'post')
            response = requests.get(high_quality_url, headers=headers, timeout=15)
            
            if response.status_code == 200:
                file_ext = '.jpg' if 'jpg' in url else '.webp'
                file_name = f"{model_name}_{i+1}{file_ext}"
                with open(os.path.join(model_folder, file_name), 'wb') as f:
                    f.write(response.content)
                success_count += 1
            else:
                # Fallback to original URL
                response = requests.get(url, headers=headers, timeout=15)
                if response.status_code == 200:
                    file_ext = '.jpg' if 'jpg' in url else '.webp'
                    file_name = f"{model_name}_{i+1}_thumb{file_ext}"
                    with open(os.path.join(model_folder, file_name), 'wb') as f:
                        f.write(response.content)
                    success_count += 1
            
            time.sleep(random.uniform(*DOWNLOAD_DELAY))
            
        except Exception as e:
            print(f"\nError downloading image {i+1} for {model_name}: {str(e)}")
            continue
    
    return success_count

def generate_summary(base_folder):
    """Generate a summary report of downloaded images"""
    print("\n📊 Download Summary:")
    for model in target_models:
        model_folder = os.path.join(base_folder, model)
        if os.path.exists(model_folder):
            count = len([f for f in os.listdir(model_folder) if f.endswith(('.jpg', '.webp'))])
            print(f"- {model}: {count} images")

def main():
    print("🚀 Starting Divar car images scraper...")
    base_folder = create_folders()
    
    total_downloaded = 0
    for model_name, query in target_models.items():
        downloaded = scrape_model(model_name, query, base_folder)
        total_downloaded += downloaded
    
    generate_summary(base_folder)
    print(f"\n✅ All operations completed! Total images downloaded: {total_downloaded}")

if __name__ == "__main__":
    main()

🚀 Starting Divar car images scraper...

Starting scraping for پژو206...


Scraping پژو206 pages: 100%|██████████| 10/10 [00:23<00:00,  2.36s/it]


Found 72 images for پژو206


Downloading پژو206: 100%|██████████| 72/72 [02:43<00:00,  2.26s/it]



Starting scraping for پژو207...


Scraping پژو207 pages: 100%|██████████| 10/10 [00:25<00:00,  2.53s/it]


Found 72 images for پژو207


Downloading پژو207: 100%|██████████| 72/72 [02:43<00:00,  2.28s/it]



Starting scraping for سمند...


Scraping سمند pages: 100%|██████████| 10/10 [00:25<00:00,  2.54s/it]


Found 72 images for سمند


Downloading سمند: 100%|██████████| 72/72 [02:42<00:00,  2.25s/it]



Starting scraping for دنا...


Scraping دنا pages: 100%|██████████| 10/10 [00:29<00:00,  2.97s/it]


Found 72 images for دنا


Downloading دنا: 100%|██████████| 72/72 [02:41<00:00,  2.24s/it]



Starting scraping for تارا...


Scraping تارا pages: 100%|██████████| 10/10 [00:25<00:00,  2.59s/it]


Found 69 images for تارا


Downloading تارا: 100%|██████████| 69/69 [02:44<00:00,  2.38s/it]



Starting scraping for رانا...


Scraping رانا pages: 100%|██████████| 10/10 [00:21<00:00,  2.18s/it]


Found 72 images for رانا


Downloading رانا: 100%|██████████| 72/72 [02:40<00:00,  2.23s/it]


📊 Download Summary:
- پژو206: 72 images
- پژو207: 72 images
- سمند: 72 images
- دنا: 72 images
- تارا: 69 images
- رانا: 72 images

✅ All operations completed! Total images downloaded: 429



