In [1]:
from bs4 import BeautifulSoup
import requests
import re
from tqdm import tqdm

In [2]:
start_path = "https://idahoaviation.com/idaho-airstrips-members/idaho-airstrip-network-members/"
## getting airstrips list needs a logged in user
## this information was copied from the page source in the browser and hence its hardcoded

In [3]:
with open("airstrips.html") as f:
    airstrips_html = f.read()
airstrps_soup = BeautifulSoup(airstrips_html)
airstrip_links = airstrps_soup.find_all('a', class_="airstrip_list_item")

In [4]:
airstrips = {a.getText(): a.get('href') for a in airstrip_links}
airstrips

{'45 Ranch (NA4)': 'https://idahoaviation.com/airstrips/45-ranch/',
 'Aberdeen (U36)': 'https://idahoaviation.com/airstrips/aberdeen/',
 'Alpine (46U)': 'https://idahoaviation.com/airstrips/alpine/',
 'American Falls (U01)': 'https://idahoaviation.com/airstrips/american-falls/',
 'Antelope Valley (U92)': 'https://idahoaviation.com/airstrips/antelope-valley/',
 'Arco (KAOC)': 'https://idahoaviation.com/airstrips/arco/',
 'Athol ID (ID05)': 'https://idahoaviation.com/airstrips/athol-id/',
 'Atlanta (55H)': 'https://idahoaviation.com/airstrips/atlanta/',
 'Bancroft (U51)': 'https://idahoaviation.com/airstrips/bancroft/',
 'Bear Lake Co./Paris (1U7)': 'https://idahoaviation.com/airstrips/paris-bear-lake-co/',
 'Bear Trap (1U0)': 'https://idahoaviation.com/airstrips/bear-trap/',
 'Bernard (U54)': 'https://idahoaviation.com/airstrips/bernard/',
 'Big Bar (1DA)': 'https://idahoaviation.com/airstrips/big-bar/',
 'Big Creek (U60)': 'https://idahoaviation.com/airstrips/big-creek/',
 'Big Souther

In [31]:
def parse_airstrip(airstrip_link):
    doc = requests.get(airstrip_link).content
    soup = BeautifulSoup(doc)
    entry = soup.find('div', class_="entry-content")
    details = {i.split(':')[0].lower().replace(' ','_'): i.split(':')[1].strip() for i in  entry.find('div', class_='basics_wrap').getText().strip().split('\n')}
    details['elevation_m'] = float(details.get('elevation','0.0'))/3.33 if details.get('elevation') else 0.0
    description = soup.find('div', class_='description').getText().strip()
    details['description'] = description
    managers = [t.getText().strip().replace('\n',' ').replace('\t',' ') for t in soup.find_all('table', class_='manager-tables')]

    details['managers'] = managers
    classification = soup.find('div', class_='classification_wrap').getText().strip().replace('\n',' ').replace('\t',' ')
    details['classification'] = classification

    runway_raw = soup.find('div', class_='runway_wrap').getText().strip().replace('\n',' ').replace('\t',' ').split('  ')
    try:
        features_wrap = entry.find_all('div', class_='features_wrap')
        if features_wrap:
            features_types = features_wrap[0].find_all('p')
            amenities = [f.getText().strip().replace('\n',' ').replace('\t',' ') for f in features_types]
            features = {'amenities': amenities}
            if len(features_wrap) > 1:
                accoms = features_wrap[1].find('div', class_='emergency-acomodation').find_all('div', class_='sns_wrap')
                accoms = [accom.getText().replace('\n', '').replace('Member Name', 'Member Name: ').replace('[email\xa0protected]', '').split('\xa0') for accom in accoms]
                accomodation = []
                for accom in accoms:
                    member = accom[0].replace('Member Name: ', '').strip()
                    member_details = ', '.join([acc.strip() for acc in accom[1:] if acc.strip()])
                    accomodation.append({'member': member, 'details': member_details})
                features['accommodation'] = accomodation
            details['features'] = features
    except Exception as e:
        print("ERROR parsing features", e)
    try:
        wx_wrap = entry.find_all('div', class_='wx_wrap')
        if wx_wrap:
            entries = [entry.find_all('a') for entry in wx_wrap]
            entries = [element for sublist in entries for element in sublist]
            entries = [{'station': wx.getText().strip().lower(), 'url': wx.get('href')} for wx in entries]
            details['weather'] = entries
    except Exception as e:
        print("ERROR parsing weather", e)
    try:
        runway = {i.split(':')[0].lower().strip().replace(' ','_'): i.split(':')[1] if len(i.split(':')) > 1 else '' for i in runway_raw}
        if '' in runway:
            del runway['']
        runway_diagram = entry.find_all('a', class_='runway_diagram')
        if runway_diagram:

            runway['runway_diagrams'] = [{diag.getText().strip().lower().replace(' ','_').replace('download_', ''): diag.get('href')} for diag in runway_diagram]
        details['runway'] = runway
    except Exception as e:
        print("ERROR", e, runway_raw)

    details['link'] = airstrip_link
    return details

In [32]:
# scrape individual pages for airstrips
# and store results to a dict
parsed_airstrips = {}
for airstrip in airstrips:
    print("parsing", airstrip)
    parsed_airstrips[airstrip] = parse_airstrip(airstrips[airstrip])

parsing 45 Ranch (NA4)
parsing Aberdeen (U36)
parsing Alpine (46U)
parsing American Falls (U01)
parsing Antelope Valley (U92)
parsing Arco (KAOC)
parsing Athol ID (ID05)
parsing Atlanta (55H)
parsing Bancroft (U51)
parsing Bear Lake Co./Paris (1U7)
parsing Bear Trap (1U0)
parsing Bernard (U54)
parsing Big Bar (1DA)
parsing Big Creek (U60)
parsing Big Southern Butte (U46)
parsing Blackfoot (U02)
parsing Boise (KBOI)
parsing Bonners Ferry (65S)
parsing Brooks Seaplane Base (S76)
parsing Bruce Meadows (U63)
parsing Buhl (U03)
parsing Burley (KBYI)
parsing Cabin Creek (I08)
parsing Cache Creek (OR62)
parsing Caldwell (KEUL)
parsing Carey (U65)
parsing Cascade (U70)
parsing Cavanaugh Bay (66S)
parsing Cayuse Creek (C64)
parsing Challis (KLLJ)
parsing Chamberlain Basin (U79)
parsing Coeur D’ Alene-Pappy Boyington Field (KCOE)
parsing Cold Meadows (U81)
parsing Copper Basin (0U2)
parsing Cottonwood (S84)
parsing Cougar Ranch (D47)
parsing Council (U82)
parsing Cox’s Well (U48)
parsing Craigmo

In [7]:
# Store results for later use
import json
with open('airstrips.json', 'w') as f:
    json.dump(parsed_airstrips, f)

In [17]:
# Download runway diagrams and standard operating procedures
for key, airstrip in parsed_airstrips.items():
    if 'runway_diagrams' in airstrip['runway']:
        diagrams = airstrip['runway']['runway_diagrams']
        for diagram in diagrams:
            if 'runway_diagram' in diagram:
                diag = requests.get(diagram['runway_diagram'], allow_redirects=True)
                extension = diagram['runway_diagram'].split('.')[-1]
                open('images/' + key+'AIRPORT' + '.'+extension, 'wb').write(diag.content)
            if 'standard_operating_procedure' in diagram:
                diag = requests.get(diagram['standard_operating_procedure'], allow_redirects=True)
                extension = diagram['standard_operating_procedure'].split('.')[-1]
                open('images/' + key+'OPERATING_PROCEDURE' + '.'+extension, 'wb').write(diag.content)

In [37]:
# template copied from the Utah Backcountry pilots Foreflight pack
# the template was broken down into 3 parts (the data row, the placemark and the overall template)
# the data row is a jinja2 template that is rendered for each airstrip
# the placemark is a jinja2 template that is rendered for each airstrip
# the overall template is a jinja2 template that is rendered for all the placemarks

from jinja2 import Environment, FileSystemLoader
environment = Environment(loader=FileSystemLoader("./templates"))
row_template = environment.get_template("placemark_row_template.kml")
placemark_template = environment.get_template("placemark_template.kml")
template = environment.get_template("template.kml")

In [38]:

rendered_placemarks = []
for key, airstrip in tqdm(parsed_airstrips.items()):
    rendered_row = re.sub('\n[ ]*','',row_template.render(key=key, **airstrip))
    rendered_placemark = placemark_template.render(key=key, template_row=rendered_row, **airstrip)
    rendered_placemarks.append(rendered_placemark)


100%|██████████| 161/161 [00:00<00:00, 4674.27it/s]


In [39]:
rendered_template = template.render(formatted_airstrips=rendered_placemarks)
with open('IdahoAirstrips.kml', 'w') as f:
    f.write(rendered_template)

In [40]:
!rm -rf IdahoAirstrips
!mkdir -p IdahoAirstrips/navdata
!cp IdahoAirstrips.kml IdahoAirstrips/navdata
!cp templates/manifest.json IdahoAirstrips/
!cp images/* IdahoAirstrips/navdata
!zip -r IdahoAirstrips.zip  IdahoAirstrips

updating: IdahoAirstrips/ (stored 0%)
updating: IdahoAirstrips/navdata/ (stored 0%)
updating: IdahoAirstrips/navdata/Stanley (2U7)OPERATING_PROCEDURE.pdf (deflated 7%)
updating: IdahoAirstrips/navdata/Orogrande (75C)AIRPORT.pdf (deflated 3%)
updating: IdahoAirstrips/navdata/New Meadows (1U4)AIRPORT.pdf (deflated 6%)
updating: IdahoAirstrips/navdata/Vines (NA8)AIRPORT.pdf (deflated 4%)
updating: IdahoAirstrips/navdata/Upper Loon Creek (U72)AIRPORT.pdf (deflated 5%)
updating: IdahoAirstrips/navdata/Cavanaugh Bay (66S)OPERATING_PROCEDURE.pdf (deflated 7%)
updating: IdahoAirstrips/navdata/Bruce Meadows (U63)AIRPORT.pdf (deflated 5%)
updating: IdahoAirstrips/navdata/Hollow Top (0U7)AIRPORT.pdf (deflated 8%)
updating: IdahoAirstrips/navdata/Smiley Creek (U87)OPERATING_PROCEDURE.pdf (deflated 3%)
updating: IdahoAirstrips/navdata/Lower Loon Creek (C53)AIRPORT.pdf (deflated 5%)
updating: IdahoAirstrips/navdata/Cabin Creek (I08)AIRPORT.pdf (deflated 4%)
updating: IdahoAirstrips/navd

In [23]:
floor(1.5)

NameError: name 'floor' is not defined