In [None]:
import asyncio
from playwright.async_api import async_playwright
import nest_asyncio
from datetime import datetime, timedelta
import json

nest_asyncio.apply()

async def scrape_multiple_routes():
    routes = [
        ("נמל-התעופה-שארל-דה-גול-פריז-צרפת", "לונדון-הממלכה-המאוחדת"),
        ("נמל-התעופה-שארל-דה-גול-פריז-צרפת", "רומא-איטליה"),
        ("לונדון-הממלכה-המאוחדת", "נמל-התעופה-שארל-דה-גול-פריז-צרפת"),
        ("לונדון-הממלכה-המאוחדת", "רומא-איטליה"),
        ("רומא-איטליה", "לונדון-הממלכה-המאוחדת"),
        ("רומא-איטליה", "נמל-התעופה-שארל-דה-גול-פריז-צרפת"),
    ]

    max_results_per_route = 100
    all_flights_data = []

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False)
        context = await browser.new_context()
        page = await context.new_page()

        today = datetime.now()
        counter = 0  # Counter to track the number of processed flights

        for origin, destination in routes:
            for ttt in range(1, 31):
                for los in range(1, 6):
                    try:
                        departure_date = today + timedelta(days=ttt)
                        return_date = departure_date + timedelta(days=los)
                        results_url = (
                            f"https://www.kiwi.com/il/search/results/{origin}/{destination}/"
                            f"{departure_date.strftime('%Y-%m-%d')}/{return_date.strftime('%Y-%m-%d')}?return=true"
                        )

                        print(f"Scraping route: {origin} -> {destination} (TTT={ttt}, LOS={los})")
                        
                        await page.goto(results_url, timeout=60000)
                        await page.wait_for_load_state("networkidle", timeout=60000)

                        flight_cards = page.locator('div[data-test="ResultCardWrapper"]')
                        count = await flight_cards.count()
                        print(f"Total flight cards found: {count}")

                        for i in range(min(count, max_results_per_route)):
                            try:
                                # זמני המראה ונחיתה
                                departure_time_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(1) div[data-test="TripTimestamp"]:first-child time'
                                )
                                departure_time = await departure_time_element.get_attribute('datetime') if await departure_time_element.count() > 0 else "Unknown"

                                arrival_time_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(1) div[data-test="TripTimestamp"]:last-child time'
                                )
                                arrival_time = await arrival_time_element.get_attribute('datetime') if await arrival_time_element.count() > 0 else "Unknown"

                                return_departure_time_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(2) div[data-test="TripTimestamp"]:first-child time'
                                )
                                return_departure_time = await return_departure_time_element.get_attribute('datetime') if await return_departure_time_element.count() > 0 else "Unknown"

                                return_arrival_time_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(2) div[data-test="TripTimestamp"]:last-child time'
                                )
                                return_arrival_time = await return_arrival_time_element.get_attribute('datetime') if await return_arrival_time_element.count() > 0 else "Unknown"

                                # שדות תעופה להלוך
                                onward_departure_airport_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(1) div[data-test="stationName"]'
                                ).nth(0)
                                onward_departure_airport = await onward_departure_airport_element.inner_text() if await onward_departure_airport_element.count() > 0 else "Unknown"

                                onward_arrival_airport_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(1) div[data-test="stationName"]'
                                ).nth(1)
                                onward_arrival_airport = await onward_arrival_airport_element.inner_text() if await onward_arrival_airport_element.count() > 0 else "Unknown"

                                # שדות תעופה לחזור
                                return_departure_airport_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(2) div[data-test="stationName"]'
                                ).nth(0)
                                return_departure_airport = await return_departure_airport_element.inner_text() if await return_departure_airport_element.count() > 0 else "Unknown"

                                return_arrival_airport_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(2) div[data-test="stationName"]'
                                ).nth(1)
                                return_arrival_airport = await return_arrival_airport_element.inner_text() if await return_arrival_airport_element.count() > 0 else "Unknown"

                                # חברות תעופה להלוך
                                onward_airlines_elements = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(1) div[data-test="ResultCardCarrierLogo"] img'
                                )
                                onward_airlines = []
                                onward_count = await onward_airlines_elements.count()
                                for j in range(onward_count):
                                    onward_airlines.append(
                                        await onward_airlines_elements.nth(j).get_attribute('alt')
                                    )

                                # חברות תעופה לחזור
                                return_airlines_elements = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(2) div[data-test="ResultCardCarrierLogo"] img'
                                )
                                return_airlines = []
                                return_count = await return_airlines_elements.count()
                                for j in range(return_count):
                                    return_airlines.append(
                                        await return_airlines_elements.nth(j).get_attribute('alt')
                                    )

                                # סוג הטיסה (הלוך)
                                onward_flight_type_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(1) div[data-test^="StopCountBadge"]'
                                )
                                onward_flight_type = await onward_flight_type_element.inner_text() if await onward_flight_type_element.count() > 0 else "Unknown"

                                # סוג הטיסה (חזור)
                                return_flight_type_element = flight_cards.nth(i).locator(
                                    'div[data-test="ResultCardSectorWrapper"]:nth-child(2) div[data-test^="StopCountBadge"]'
                                )
                                return_flight_type = await return_flight_type_element.inner_text() if await return_flight_type_element.count() > 0 else "Unknown"

                                # נתוני כבודה
                                baggage_breakdown = flight_cards.nth(i).locator('div[data-test="BaggageBreakdown"]')
                                try:
                                    personal_items = await baggage_breakdown.locator('[data-test="BaggageBreakdownPersonalItem"]').all_text_contents()
                                    personal_item = personal_items[0] if personal_items else "0"

                                    cabin_bags = await baggage_breakdown.locator('[data-test="BaggageBreakdownCabinBag"]').all_text_contents()
                                    cabin_bag = cabin_bags[0] if cabin_bags else "0"

                                    checked_bags = await baggage_breakdown.locator('[data-test="BaggageBreakdownCheckedBag"]').all_text_contents()
                                    checked_bag = checked_bags[0] if checked_bags else "0"
                                except Exception as e:
                                    print(f"Error collecting baggage information for flight {i + 1}: {e}")
                                    personal_item = "Unknown"
                                    cabin_bag = "Unknown"
                                    checked_bag = "Unknown"

                                # הוספת שדה Snapshot
                                snapshot_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

                                flight_data = {
                                    'route': f"{origin} -> {destination} (TTT={ttt}, LOS={los})",
                                    'departure_time': departure_time,
                                    'arrival_time': arrival_time,
                                    'return_departure_time': return_departure_time,
                                    'return_arrival_time': return_arrival_time,
                                    'onward_departure_airport': onward_departure_airport,
                                    'onward_arrival_airport': onward_arrival_airport,
                                    'return_departure_airport': return_departure_airport,
                                    'return_arrival_airport': return_arrival_airport,
                                    'onward_airlines': onward_airlines,
                                    'return_airlines': return_airlines,
                                    'onward_flight_type': onward_flight_type,
                                    'return_flight_type': return_flight_type,
                                    'personal_item': personal_item,
                                    'cabin_bag': cabin_bag,
                                    'checked_bag': checked_bag,
                                    'snapshot_time': snapshot_time
                                }

                                all_flights_data.append(flight_data)
                                counter += 1

                                # Print the flight data only for every 5th flight
                                if counter % 5 == 0:
                                    print(f"Flight #{counter} data: {flight_data}")

                            except Exception as e:
                                print(f"Error collecting flight card {i + 1}: {e}")

                    except Exception as e:
                        print(f"Error occurred for route {origin} -> {destination} (TTT={ttt}, LOS={los}): {e}")
                        continue

        await browser.close()

        print(f"\nTotal flights across all routes: {len(all_flights_data)}")
        with open('flights_data.json', 'w', encoding='utf-8') as f:
            json.dump(all_flights_data, f, ensure_ascii=False, indent=4)

await scrape_multiple_routes()


Scraping route: נמל-התעופה-שארל-דה-גול-פריז-צרפת -> לונדון-הממלכה-המאוחדת (TTT=1, LOS=1)
Total flight cards found: 3
Scraping route: נמל-התעופה-שארל-דה-גול-פריז-צרפת -> לונדון-הממלכה-המאוחדת (TTT=1, LOS=2)
Total flight cards found: 3
Flight #5 data: {'route': 'נמל-התעופה-שארל-דה-גול-פריז-צרפת -> לונדון-הממלכה-המאוחדת (TTT=1, LOS=2)', 'departure_time': '2025-01-27T07:20:00.000+02:00', 'arrival_time': '2025-01-27T07:30:00.000+02:00', 'return_departure_time': '2025-01-29T18:40:00.000+02:00', 'return_arrival_time': '2025-01-29T21:00:00.000+02:00', 'onward_departure_airport': 'CDG', 'onward_arrival_airport': 'LGW', 'return_departure_airport': 'LTN', 'return_arrival_airport': 'CDG', 'onward_airlines': ['easyJet'], 'return_airlines': ['easyJet'], 'onward_flight_type': 'ישירה', 'return_flight_type': 'ישירה', 'personal_item': '1', 'cabin_bag': '0', 'checked_bag': '0', 'snapshot_time': '2025-01-26 00:42:17'}
Scraping route: נמל-התעופה-שארל-דה-גול-פריז-צרפת -> לונדון-הממלכה-המאוחדת (TTT=1, LOS=3)