In [None]:
import requests
import pandas as pd
from datetime import datetime
from tqdm import tqdm

API_KEY = 'YOUR_API_KEY'  # Skyscanner API 키를 여기에 입력

cities = ["LON-sky", "ROM-sky", "PAR-sky", "MAD-sky", "BCN-sky", "NCE-sky"]  # Skyscanner 도시 코드

def get_flight_price(departure_date, return_date, from_city, to_city):
    url = f"https://partners.api.skyscanner.net/apiservices/browseroutes/v1.0/KR/KRW/en-US/{from_city}/{to_city}/{departure_date}/{return_date}"
    headers = {
        "api-key": API_KEY
    }
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        data = response.json()
        prices = [quote['MinPrice'] for quote in data['Quotes'] if quote['Direct']]
        if prices:
            return min(prices), url
    return None, None

def save_to_excel(data, filename="flight_prices.xlsx"):
    df = pd.DataFrame(data, columns=["Departure City", "Return City", "Price", "Link"])
    df.to_excel(filename, index=False)

def main():
    departure_date = "2025-04-20"
    return_date = "2025-05-08"

    results = []

    for from_city in tqdm(cities, desc="Cities", unit="city"):
        for to_city in cities:
            price, link = get_flight_price(departure_date, return_date, from_city, to_city)
            if price:
                results.append([from_city, to_city, price, link])
            else:
                print(f"No data for {from_city} to {to_city}")

    save_to_excel(results)
    print(f"Saved {len(results)} results to flight_prices.xlsx")

if __name__ == "__main__":
    main()



In [6]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time
from tqdm import tqdm

cities = ["London", "Rome", "Paris", "Madrid", "Barcelona", "Nice"]

def scrape_flight_price(departure_date, return_date, from_city, to_city):
    url = f"https://www.skyscanner.com/transport/flights/{from_city}/{to_city}/{departure_date}/{return_date}/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # 디버깅: HTML 저장
    with open(f"{from_city}_to_{to_city}.html", "w", encoding='utf-8') as file:
        file.write(response.text)

    # 가격과 링크를 추출
    flights = soup.find_all("div", class_="ticket-info")
    prices_links = []
    for flight in flights:
        price_tag = flight.find("div", class_="price-tag")
        if price_tag and "direct" in flight.get("data-flight-tags", ""):
            price = int(price_tag.text.replace(",", "").replace("$", ""))
            link_tag = flight.find("a", class_="ticket-action")
            link = "https://www.skyscanner.com" + link_tag.get("href") if link_tag else None
            prices_links.append((price, link))

    if prices_links:
        return min(prices_links, key=lambda x: x[0])  # 가격이 가장 낮은 것을 선택
    else:
        return None, None

def save_to_excel(data, filename="flight_prices.xlsx"):
    df = pd.DataFrame(data, columns=["Departure City", "Return City", "Price", "Link"])
    df.to_excel(filename, index=False)

def main():
    departure_date = "2025-04-20"
    return_date = "2025-05-08"

    results = []

    for from_city in tqdm(cities, desc="Cities", unit="city"):
        for to_city in cities:
            price, link = scrape_flight_price(departure_date, return_date, from_city, to_city)
            if price:
                results.append([from_city, to_city, price, link])
            else:
                print(f"No data for {from_city} to {to_city}")
            time.sleep(10)  # 요청 사이에 10초 대기

    save_to_excel(results)
    print(f"Saved {len(results)} results to {filename}")

if __name__ == "__main__":
    main()


Cities:   0%|          | 0/6 [00:00<?, ?city/s]

No data for London to London
No data for London to Rome
No data for London to Paris
No data for London to Madrid
No data for London to Barcelona
No data for London to Nice


Cities:  17%|█▋        | 1/6 [01:00<05:03, 60.77s/city]

No data for Rome to London
No data for Rome to Rome
No data for Rome to Paris
No data for Rome to Madrid
No data for Rome to Barcelona
No data for Rome to Nice


Cities:  33%|███▎      | 2/6 [02:01<04:02, 60.72s/city]

No data for Paris to London
No data for Paris to Rome
No data for Paris to Paris
No data for Paris to Madrid
No data for Paris to Barcelona


Cities:  33%|███▎      | 2/6 [02:52<05:44, 86.03s/city]


KeyboardInterrupt: 

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime, timedelta
import time
from tqdm import tqdm


In [2]:
cities = ["London", "Rome", "Paris", "Madrid", "Barcelona", "Nice"]


In [3]:
def scrape_flight_price(departure_date, return_date, from_city, to_city):
    url = f"https://www.skyscanner.com/transport/flights/{from_city}/{to_city}/{departure_date}/{return_date}/"
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # 가격과 링크를 추출
    flights = soup.find_all("div", class_="ticket-info")
    prices_links = []
    for flight in flights:
        price_tag = flight.find("div", class_="price-tag")
        if price_tag and "direct" in flight.get("data-flight-tags", ""):
            price = int(price_tag.text.replace(",", "").replace("$", ""))
            link_tag = flight.find("a", class_="ticket-action")
            link = "https://www.skyscanner.com" + link_tag.get("href") if link_tag else None
            prices_links.append((price, link))

    if prices_links:
        return min(prices_links, key=lambda x: x[0])  # 가격이 가장 낮은 것을 선택
    else:
        return None, None


In [4]:
def save_to_excel(data, filename="flight_prices.xlsx"):
    df = pd.DataFrame(data, columns=["Departure City", "Return City", "Price", "Link"])
    df.to_excel(filename, index=False)

In [5]:
def main():
    departure_date = "2025-04-20"
    return_date = "2025-05-08"

    results = []

    for from_city in tqdm(cities, desc="Cities", unit="city"):
        for to_city in cities:
            price, link = scrape_flight_price(departure_date, return_date, from_city, to_city)
            if price:
                results.append([from_city, to_city, price, link])
            time.sleep(1.5)  # 요청 사이에 2초 대기

    save_to_excel(results)

if __name__ == "__main__":
    main()

Cities:   0%|          | 0/6 [00:00<?, ?city/s]

Cities: 100%|██████████| 6/6 [00:58<00:00,  9.71s/city]
