In [138]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import csv
from selenium import webdriver
from datetime import datetime



In [154]:
# change these 3 to generate flights to each city for the date from the specific airport
departloc = "ORD"
depart_time = "2025-01-04"
return_time = "2025-01-06"

def generate_flight_links(departloc, depart_time, return_time, team_airports):
    flight_links = {}
    
    for team, arriveloc in team_airports.items():
        flight_url = f"https://www.google.com/travel/flights?q=Flights%20to%20{arriveloc}%20from%20{departloc}%20on%20{depart_time}%20through%20{return_time}%202%20seats%20"
        flight_links[team] = flight_url

    return flight_links

team_airports = {
    "MIN": "MSP",  # Minnesota Vikings - Minneapolis-St. Paul International
    "CHI": "ORD",  # Chicago Bears - O'Hare International
    "GB": "GRB",   # Green Bay Packers - Green Bay–Austin Straubel International
    "DET": "DTW",  # Detroit Lions - Detroit Metropolitan
    "IND": "IND",  # Indianapolis Colts - Indianapolis International
    "JAX": "JAX",  # Jacksonville Jaguars - Jacksonville International
    "TEN": "BNA",  # Tennessee Titans - Nashville International
    "NE": "BOS",   # New England Patriots - Logan International
    "BUF": "BUF",  # Buffalo Bills - Buffalo Niagara International
    "MIA": "MIA",  # Miami Dolphins - Miami International
    "NYJ": "JFK",  # New York Jets - John F. Kennedy International
    "BAL": "BWI",  # Baltimore Ravens - Baltimore/Washington International
    "CIN": "CVG",  # Cincinnati Bengals - Cincinnati/NKY International
    "CLE": "CLE",  # Cleveland Browns - Cleveland Hopkins International
    "PIT": "PIT",  # Pittsburgh Steelers - Pittsburgh International
    "HOU": "IAH",  # Houston Texans - George Bush Intercontinental
    "KC": "MCI",   # Kansas City Chiefs - Kansas City International
    "LV": "LAS",   # Las Vegas Raiders - McCarran International
    "LAC": "LAX",  # Los Angeles Chargers - Los Angeles International
    "LAR": "LAX",  # Los Angeles Rams - Los Angeles International
    "SEA": "SEA",  # Seattle Seahawks - Seattle-Tacoma International
    "ARI": "PHX",  # Arizona Cardinals - Phoenix Sky Harbor International
    "SF": "SFO",   # San Francisco 49ers - San Francisco International
    "ATL": "ATL",  # Atlanta Falcons - Hartsfield-Jackson Atlanta International
    "CAR": "CLT",  # Carolina Panthers - Charlotte Douglas International
    "NO": "MSY",   # New Orleans Saints - Louis Armstrong New Orleans International
    "TB": "TPA",   # Tampa Bay Buccaneers - Tampa International
    "NYG": "JFK",  # New York Giants - John F. Kennedy International
    "DAL": "DFW",  # Dallas Cowboys - Dallas/Fort Worth International
    "WAS": "DCA",  # Washington Commanders - Ronald Reagan Washington National
    "PHI": "PHL",  # Philadelphia Eagles - Philadelphia International
    "CHI": "ORD",  # Chicago Bears - O'Hare International
    "MIN": "MSP",  # Minnesota Vikings - Minneapolis-St. Paul International
}

# flight_links['MIN'] would ouput the link FROM the chosen departloc TO min
flight_links = generate_flight_links(departloc, depart_time, return_time, team_airports)


In [160]:
nfc_north_codes = ['MIN', 'CHI', 'DET', 'GB']
nfc_east_codes = ['PHI', 'WSH', 'DAL', 'NYG']
nfc_south_codes = ['TB', 'ATL', 'NO', 'CAR']
nfc_west_codes = ['LAR', 'SEA', 'ARI', 'SF']
afc_west_codes = ['KC', 'LAC', 'DEN', 'LV']
afc_east_codes = ['BUF', 'MIA', 'NYJ', 'NE']
afc_south_codes = ['HOU', 'IND', 'JAX', 'TEN']
afc_north_codes = ['PIT', 'BAL', 'CIN', 'CLE']
all_codes = nfc_north_codes + nfc_east_codes + nfc_south_codes + nfc_west_codes + afc_west_codes + afc_east_codes + afc_south_codes + afc_north_codes

# gives website link to flights to TEN
print(flight_links['SEA'])

https://www.google.com/travel/flights?q=Flights%20to%20SEA%20from%20ORD%20on%202025-01-04%20through%202025-01-06%202%20seats%20


In [157]:
def time_parser(time):
    next_day = "+1" if "+1" in time else ""
    cleaned_time = time.replace('\u202f', '').replace("+1", "")
    formatted_time = datetime.strptime(cleaned_time, "%I:%M%p").strftime("%I:%M %p")
    return f"{formatted_time} {next_day}".strip()

def scrape_flight_details(url, team_name):
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
    }
    
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        print(f"Failed to retrieve the webpage. Status code: {response.status_code}")
        return []
    
    soup = BeautifulSoup(response.text, "html.parser")

    flight_details = []

    # Select all elements with class "Rk10dc"
    flights = soup.select(".Rk10dc")

    for flight in flights:
        try:
            # Get all child elements (e.g., <li>) of the current flight section
            flight_items = flight.find_all("li", recursive=False)  # Use recursive=False to limit to direct children

            for flight_item in flight_items:
                # Extract airline name
                airline_element = flight_item.select_one(".h1fkLb")  # Update with the correct class
                airline_name = airline_element.get_text(strip=True) if airline_element else "N/A"



                # Extract departure and arrival times for the outgoing flight
                departure_time_outbound_element = flight_item.select_one(".wtdjmc.YMlIz.ogfYpf.tPgKwe")  # Update with correct class
                arrival_time_outbound_element = flight_item.select_one(".XWcVob.YMlIz.ogfYpf.tPgKwe")  # Update with correct class
                departure_time_outbound = time_parser(departure_time_outbound_element.get_text(strip=True) if departure_time_outbound_element else "N/A")
                arrival_time_outbound = time_parser(arrival_time_outbound_element.get_text(strip=True) if arrival_time_outbound_element else "N/A")
                
                # Extract price
                price_element = flight_item.select_one(".YMlIz.FpEdX")  # Update with the correct class
                price_text = price_element.get_text(strip=True) if price_element else "N/A"
                flight_price = int("".join(filter(str.isdigit, price_text))) if price_text != "N/A" else "N/A"

                # Append the details to the list
                new_flight = {
                    "Team": team_name,
                    "Airline": airline_name,
                    "Departure Time (Outbound)": departure_time_outbound,
                    "Arrival Time (Outbound)": arrival_time_outbound,
                    "Price": flight_price
                }

                if new_flight not in flight_details and "N/A" not in new_flight.values():
                    flight_details.append(new_flight)
        except Exception as e:
            continue

    return flight_details


In [158]:
# DEMO

url = f"https://www.google.com/travel/flights?q=Flights%20to%20BWI%20from%20MSN%20on%202025-01-04%20through%202025-01-06%202%20seats%20"
team_name = "GB"
flights = scrape_flight_details(url, team_name)

for flight in flights:
    print(flight)


{'Team': 'GB', 'Airline': 'UnitedOperated by Republic Airways DBA United Express', 'Departure Time (Outbound)': '05:23 PM', 'Arrival Time (Outbound)': '11:09 PM', 'Price': 635}
{'Team': 'GB', 'Airline': 'Delta', 'Departure Time (Outbound)': '07:00 AM', 'Arrival Time (Outbound)': '11:44 AM', 'Price': 826}
{'Team': 'GB', 'Airline': 'DeltaOperated by Endeavor Air DBA Delta Connection', 'Departure Time (Outbound)': '05:06 PM', 'Arrival Time (Outbound)': '11:55 PM', 'Price': 826}
{'Team': 'GB', 'Airline': 'American', 'Departure Time (Outbound)': '06:44 AM', 'Arrival Time (Outbound)': '12:27 PM', 'Price': 937}
{'Team': 'GB', 'Airline': 'DeltaOperated by Endeavor Air DBA Delta Connection', 'Departure Time (Outbound)': '05:59 PM', 'Arrival Time (Outbound)': '12:38 AM +1', 'Price': 966}
{'Team': 'GB', 'Airline': 'DeltaOperated by Endeavor Air DBA Delta Connection', 'Departure Time (Outbound)': '10:40 AM', 'Arrival Time (Outbound)': '04:52 PM', 'Price': 1026}
{'Team': 'GB', 'Airline': 'UnitedOpe

In [159]:
def save_to_flight_csv(flight_details, filename='ORDflightsWK18.csv'):
    with open(filename, mode='a', newline='', encoding='utf-8') as file:
        writer = csv.DictWriter(file, fieldnames=["Team", "Airline", "Departure Time (Outbound)", "Arrival Time (Outbound)", "Price"])
        
        if file.tell() == 0:
            writer.writeheader()

        writer.writerows(flight_details)

for team_code, flight_url in flight_links.items():
    flights = scrape_flight_details(flight_url, team_code)

    if flights:
        save_to_flight_csv(flights)
    else:
        print(f"No flights found for {team_code}")

No flights found for CHI
No flights found for SEA
