In [161]:
from bs4 import BeautifulSoup
import requests
import re
from tqdm import tqdm

In [66]:
start_path = "https://idahoaviation.com/idaho-airstrips-members/idaho-airstrip-network-members/"
## getting airstrips list needs a logged in user
## this information was copied from the page source in the browser and hence its hardcoded

In [75]:
with open("airstrips.html") as f:
    airstrips_html = f.read()
airstrps_soup = BeautifulSoup(airstrips_html)
airstrip_links = airstrps_soup.find_all('a', class_="airstrip_list_item")

In [121]:
airstrips = {a.getText(): a.get('href') for a in airstrip_links}
airstrips

{'45 Ranch (NA4)': 'https://idahoaviation.com/airstrips/45-ranch/',
 'Aberdeen (U36)': 'https://idahoaviation.com/airstrips/aberdeen/',
 'Alpine (46U)': 'https://idahoaviation.com/airstrips/alpine/',
 'American Falls (U01)': 'https://idahoaviation.com/airstrips/american-falls/',
 'Antelope Valley (U92)': 'https://idahoaviation.com/airstrips/antelope-valley/',
 'Arco (KAOC)': 'https://idahoaviation.com/airstrips/arco/',
 'Athol ID (ID05)': 'https://idahoaviation.com/airstrips/athol-id/',
 'Atlanta (55H)': 'https://idahoaviation.com/airstrips/atlanta/',
 'Bancroft (U51)': 'https://idahoaviation.com/airstrips/bancroft/',
 'Bear Lake Co./Paris (1U7)': 'https://idahoaviation.com/airstrips/paris-bear-lake-co/',
 'Bear Trap (1U0)': 'https://idahoaviation.com/airstrips/bear-trap/',
 'Bernard (U54)': 'https://idahoaviation.com/airstrips/bernard/',
 'Big Bar (1DA)': 'https://idahoaviation.com/airstrips/big-bar/',
 'Big Creek (U60)': 'https://idahoaviation.com/airstrips/big-creek/',
 'Big Souther

In [138]:
def parse_airstrip(airstrip_link, key):
    doc = requests.get(airstrip_link).content
    soup = BeautifulSoup(doc)
    entry = soup.find('div', class_="entry-content")
    details = {i.split(':')[0].lower().replace(' ','_'): i.split(':')[1] for i in  entry.find('div', class_='basics_wrap').getText().strip().split('\n')}
    description = soup.find('div', class_='description').getText().strip()
    details['description'] = description
    managers = [t.getText().strip().replace('\n',' ').replace('\t',' ') for t in soup.find_all('table', class_='manager-tables')]

    details['maangers'] = managers
    classification = soup.find('div', class_='classification_wrap').getText().strip().replace('\n',' ').replace('\t',' ')
    details['classification'] = classification

    runway_raw = soup.find('div', class_='runway_wrap').getText().strip().replace('\n',' ').replace('\t',' ').split('  ')
    runway_diagram = soup.find('div', class_='runway_wrap').find('a', class_='runway_diagram')
    if runway_diagram:
        details['runway_diagram'] = runway_diagram.get('href')
    try:
        runway = {i.split(':')[0].lower().strip().replace(' ','_'): i.split(':')[1] if len(i.split(':')) > 1 else '' for i in runway_raw}
        if '' in runway:
            del runway['']
        details['runway'] = runway
    except Exception as e:
        print("ERROR", e, runway_raw)

    details['link'] = airstrip_link
    details['key'] = key
    return details

In [139]:
# validate a sample
parse_airstrip(airstrips['Friedman Memorial (KSUN)'])

{'airport_identifier': ' KSUN',
 'elevation': ' 5320',
 'latitude': ' 43.5037806',
 'longitude': ' -114.2955583',
 'description': "Attended dawn to dusk.\xa0 100LL and Jet A fuel available after dusk.\xa0 When tower closed land RWY 31 and depart RWY 13 due to opposite direction traffic; use landing lights in traffic pattern.\xa0 Noise abatement procedures contact airport manager.\xa0 Not recommended for night use in marginal weather by unfamiliar pilots due to mountainous terrain.\xa0 Taxiway A open between taxiways A2 and A3 daylight hours only.\xa0 Taxiways A5 and B5 restricted to aircraft wingspans of 49' or less.\xa0 Landing fee for aircraft greater than 6000lbs.\xa0 Favor east side of canyon on approach to RWY 31.\xa0 West side of canyon when departing RWY 13.\xa0\nServices: Major A&P repairs, tiedowns, hangars, charters, sales, air taxi, towing and gliders, ground transportation.\xa0 Food and lodging in town.",
 'maangers': ['Airstrip Manager:  Chris Pomeroy       208-788-4956',


In [140]:
# scrape individual pages for airstrips
# and store results to a dict
parsed_airstrips = {}
for airstrip in airstrips:
    print("parsing", airstrip)
    parsed_airstrips[airstrip] = parse_airstrip(airstrips[airstrip])

parsing 45 Ranch (NA4)
parsing Aberdeen (U36)
parsing Alpine (46U)
parsing American Falls (U01)
parsing Antelope Valley (U92)
parsing Arco (KAOC)
parsing Athol ID (ID05)
parsing Atlanta (55H)
parsing Bancroft (U51)
parsing Bear Lake Co./Paris (1U7)
parsing Bear Trap (1U0)
parsing Bernard (U54)
parsing Big Bar (1DA)
parsing Big Creek (U60)
parsing Big Southern Butte (U46)
parsing Blackfoot (U02)
parsing Boise (KBOI)
parsing Bonners Ferry (65S)
parsing Brooks Seaplane Base (S76)
parsing Bruce Meadows (U63)
parsing Buhl (U03)
parsing Burley (KBYI)
parsing Cabin Creek (I08)
parsing Cache Creek (OR62)
parsing Caldwell (KEUL)
parsing Carey (U65)
parsing Cascade (U70)
parsing Cavanaugh Bay (66S)
parsing Cayuse Creek (C64)
parsing Challis (KLLJ)
parsing Chamberlain Basin (U79)
parsing Coeur D’ Alene-Pappy Boyington Field (KCOE)
parsing Cold Meadows (U81)
parsing Copper Basin (0U2)
parsing Cottonwood (S84)
parsing Cougar Ranch (D47)
parsing Council (U82)
parsing Cox’s Well (U48)
parsing Craigmo

In [142]:
# Store results for later use
import json
with open('airstrips.json', 'w') as f:
    json.dump(parsed_airstrips, f)

In [177]:
# template copied from the Utah Backcountry pilots Foreflight pack
# the template was broken down into 3 parts (the data row, the placemark and the overall template)
# the data row is a jinja2 template that is rendered for each airstrip
# the placemark is a jinja2 template that is rendered for each airstrip
# the overall template is a jinja2 template that is rendered for all the placemarks

from jinja2 import Environment, FileSystemLoader
environment = Environment(loader=FileSystemLoader("./templates"))
row_template = environment.get_template("placemark_row_template.kml")
placemark_template = environment.get_template("placemark_template.kml")
template = environment.get_template("template.kml")

In [165]:

rendered_placemarks = []
for key, airstrip in tqdm(parsed_airstrips.items()):
    rendered_row = re.sub('\n[ ]*','',row_template.render(key=key, **airstrip))
    rendered_placemark = placemark_template.render(key=key, template_row=rendered_row, **airstrip)
    rendered_placemarks.append(rendered_placemark)


100%|██████████| 161/161 [00:00<00:00, 17339.40it/s]


In [180]:
rendered_template = template.render(formatted_airstrips=rendered_placemarks)
with open('airstrips.kml', 'w') as f:
    f.write(rendered_template)

In [186]:
!rm -rf IdahoAirstrips
!mkdir -p IdahoAirstrips/navdata
!cp airstrips.kml IdahoAirstrips/navdata
!cp templates/manifest.json IdahoAirstrips/
!zip -r IdahoAirstrips.zip  IdahoAirstrips

  adding: IdahoAirstrips/ (stored 0%)
  adding: IdahoAirstrips/navdata/ (stored 0%)
  adding: IdahoAirstrips/navdata/airstrips.kml (deflated 98%)
  adding: IdahoAirstrips/manifest.json (deflated 42%)
