## Libraries

In [None]:
import os
import json
import gradio as gr
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from openai import OpenAI
from IPython.display import display, Markdown

In [None]:
headers = {
 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
}

def site():
    url = 'https://www.skyscanner.it/transport/flights/mila/ika/250728/?adultsv2=1&cabinclass=economy&childrenv2=&inboundaltsenabled=false&outboundaltsenabled=false&preferdirects=false&rtn=0'
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    return soup

In [62]:
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

# Setup
options = uc.ChromeOptions()
options.add_argument('--no-sandbox')
options.add_argument('--disable-blink-features=AutomationControlled')
# options.add_argument('--headless')  # Turn on if needed
driver = uc.Chrome(options=options)

driver.get("https://flightio.com/")
time.sleep(5)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)

wait = WebDriverWait(driver, 15)

# Final result list
flights = []

try:
    route_containers = wait.until(EC.presence_of_all_elements_located(
        (By.CSS_SELECTOR, "div.flex.h-14.w-full.items-center.justify-between.rounded-lg.bg-\\[\\#F7F8FB\\].p-3")
    ))

    price_elements = driver.find_elements(By.CSS_SELECTOR, "span.text-slate-900.text-base.font-bold")
    price_list = [p.text.strip() for p in price_elements if p.text.strip()]

    for i, route in enumerate(route_containers):
        try:
            cities = route.find_elements(By.CSS_SELECTOR, "span.text-sm")
            if len(cities) < 2:
                continue

            to_city = cities[0].text.strip()
            from_city = cities[1].text.strip()

            date = route.find_element(By.CSS_SELECTOR, "span.m\\:text-body-xs\\(p\\).d\\:text-xs").text.strip()

            price = price_list[i] if i < len(price_list) else "N/A"

            flights.append({
                "from": from_city,
                "to": to_city,
                "date": date,
                "price": price
            })

        except Exception as e:
            print("Error in route:", e)

    driver.quit()

    # Output result
    for flight in flights:
        print(flight)

except Exception as outer:
    print("Page structure issue:", outer)
    driver.quit()

{'from': 'تهران', 'to': 'استانبول', 'date': 'جمعه 5 اردیبهشت', 'price': '4,627,000'}
{'from': 'تهران', 'to': 'دبی', 'date': 'سه\u200cشنبه 2 اردیبهشت', 'price': '8,190,000'}
{'from': 'تهران', 'to': 'مشهد', 'date': 'دوشنبه 8 اردیبهشت', 'price': '2,415,000'}


In [63]:
# Step 1: Replace \u200c with space in 'date' field
for flight in flights:
    flight['date'] = flight['date'].replace('\u200c', ' ').strip()

# Step 2: Remove duplicates
flights = [dict(t) for t in {tuple(flight.items()) for flight in flights}]

# Optional: Pretty print
for f in flights:
    print(f)

{'from': 'تهران', 'to': 'مشهد', 'date': 'دوشنبه 8 اردیبهشت', 'price': '2,415,000'}
{'from': 'تهران', 'to': 'استانبول', 'date': 'جمعه 5 اردیبهشت', 'price': '4,627,000'}
{'from': 'تهران', 'to': 'دبی', 'date': 'سه شنبه 2 اردیبهشت', 'price': '8,190,000'}


In [None]:
flights