# This Notebook Will Gather Data from FAA Website

In [69]:
import requests
from pathlib import Path
import os
import zipfile

In [70]:
url = "https://www.faa.gov/licenses_certificates/aircraft_certification/aircraft_registry/releasable_aircraft_download"

In [71]:
response = requests.get(url)
html_content = response.text

In [72]:
from bs4 import BeautifulSoup

soup = BeautifulSoup(html_content, "html.parser")

In [73]:
download_links = []
for link in soup.find_all("a"):
    if "Aircraft Registration Database" in link.text:
        download_links.append(link.get("href"))

In [74]:
download_links

['https://registry.faa.gov/database/ReleasableAircraft.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2013.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2014.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2015.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2016.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2017.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2018.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2019.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2020.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2021.zip',
 'https://registry.faa.gov/database/yearly/ReleasableAircraft.2022.zip']

In [75]:
download_folder = Path("../data/raw/")
os.makedirs(download_folder, exist_ok=True)

In [76]:
headers = {"User-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"}

for link in download_links:
    file_name = link.split("/")[-1]
    file_path = os.path.join(download_folder, file_name)
    print(f"Downloading {link} to {file_path}...")

    response = requests.get(link, stream=True, allow_redirects=False, headers = headers)
    with open(file_path, "wb") as file:
        for chunk in response.iter_content(chunk_size=1024):
            file.write(chunk)

Downloading https://registry.faa.gov/database/ReleasableAircraft.zip to ..\data\raw\ReleasableAircraft.zip...
Downloading https://registry.faa.gov/database/yearly/ReleasableAircraft.2013.zip to ..\data\raw\ReleasableAircraft.2013.zip...
Downloading https://registry.faa.gov/database/yearly/ReleasableAircraft.2014.zip to ..\data\raw\ReleasableAircraft.2014.zip...
Downloading https://registry.faa.gov/database/yearly/ReleasableAircraft.2015.zip to ..\data\raw\ReleasableAircraft.2015.zip...
Downloading https://registry.faa.gov/database/yearly/ReleasableAircraft.2016.zip to ..\data\raw\ReleasableAircraft.2016.zip...
Downloading https://registry.faa.gov/database/yearly/ReleasableAircraft.2017.zip to ..\data\raw\ReleasableAircraft.2017.zip...
Downloading https://registry.faa.gov/database/yearly/ReleasableAircraft.2018.zip to ..\data\raw\ReleasableAircraft.2018.zip...
Downloading https://registry.faa.gov/database/yearly/ReleasableAircraft.2019.zip to ..\data\raw\ReleasableAircraft.2019.zip...
D

In [82]:
# Specify the path to the extraction folder
extraction_folder = "../data/extracted"

# Create the extraction folder if it doesn't exist
if not os.path.exists(extraction_folder):
    os.makedirs(extraction_folder)

# Extract the files
for link in download_links:
    # Construct the path to the zip file
    file_name = link.split("/")[-1]
    print(file_name)
    zip_file_path = os.path.join(download_folder, file_name )
    print(zip_file_path)

    # Construct the path to the extraction directory
    year_folder = os.path.join(extraction_folder, file_name.rsplit(".", 1)[0])

    # Create the year folder if it doesn't exist
    if not os.path.exists(year_folder):
        os.makedirs(year_folder)

    # Extract the zip file
    with zipfile.ZipFile(zip_file_path, "r") as zip_ref:
        zip_ref.extractall(year_folder)

    print(f"Extracted files for year {year_folder}")

print("Extraction complete!")

ReleasableAircraft.zip
..\data\raw\ReleasableAircraft.zip
Extracted files for year ../data/extracted\ReleasableAircraft
ReleasableAircraft.2013.zip
..\data\raw\ReleasableAircraft.2013.zip
Extracted files for year ../data/extracted\ReleasableAircraft.2013
ReleasableAircraft.2014.zip
..\data\raw\ReleasableAircraft.2014.zip
Extracted files for year ../data/extracted\ReleasableAircraft.2014
ReleasableAircraft.2015.zip
..\data\raw\ReleasableAircraft.2015.zip
Extracted files for year ../data/extracted\ReleasableAircraft.2015
ReleasableAircraft.2016.zip
..\data\raw\ReleasableAircraft.2016.zip
Extracted files for year ../data/extracted\ReleasableAircraft.2016
ReleasableAircraft.2017.zip
..\data\raw\ReleasableAircraft.2017.zip
Extracted files for year ../data/extracted\ReleasableAircraft.2017
ReleasableAircraft.2018.zip
..\data\raw\ReleasableAircraft.2018.zip
Extracted files for year ../data/extracted\ReleasableAircraft.2018
ReleasableAircraft.2019.zip
..\data\raw\ReleasableAircraft.2019.zip
Ex