In [7]:
# from selenium import webdriver
from bs4 import BeautifulSoup, NavigableString
import pandas as pd
import numpy as np
import os
import folium
from folium.plugins import MarkerCluster
import pytz
import requests
import re

In [1]:
def query(flightN, date):
    url = 'https://flightaware.com'
    
    page = requests.get(url+'/live/flight/'+str(flightN)).content
    soup = BeautifulSoup(page)
    
    ls = soup.find_all('script')
    ls_str = [str(x) for x in ls]
    a = [x for x in ls_str if '"trackLog":' in x]
    b = a[0].split(',')
    
    c = [re.findall(r'/live/flight/'+str(flightN)+'/history/'+str(date), x) for x in b]
    ind = c.index(['/live/flight/'+str(flightN)+'/history/'+str(date)])
    url1 = b[ind].split(':')[1].strip('"')
    url2 = url1 + '/tracklog'
    url = url+url2
    
    return url

In [10]:
def traj(path, tz):
    
    dfs = pd.read_html(url)  # read html table in a pandas dataframe
    
    lat = dfs[0].LatitudeLat.iloc[5:]  # extract latitude data
    lon = dfs[0].LongitudeLon.iloc[5:]  # extract longtitude data
    lat = lat[lat.str.contains(r'^[-0-9]')]  #  keep only numeric data throgh regex
    lon = lon[lon.str.contains(r'^[-0-9]')]  # keep only numeric data through regex
    lat = np.array(['.'.join([x for x in a.split('.')[:2]]) for a in lat])  # remove artifacts from the html
    lon = np.array(['.'.join([x for x in a.split('.')[:2]]) for a in lon])  # remove artifacts from the html
    
    # transform numeric strings into floats 
    lat2 = []
    lon2 = []
    for x in lat:
        if len(x.split('-')) > 1:
            x = x.split('-')[1]
            x = -float(x)
            lat2.append(x)
        else:
            x = float(x)
            lat2.append(x)
    
    for x in lon:
        if len(x.split('-')) > 1:
            x = x.split('-')[1]
            x = -float(x)
            lon2.append(x)
        else:
            x = float(x)
            lon2.append(x)
    
   
    coords = pd.DataFrame({'lat': lat2, 'lon': lon2})  # create dataframe with the cleaned data
    
    time = dfs[0]['Time (EDT)EDT'].iloc[5:] # extract time component from html table
    time = time[time.str.contains(r'M$')]  # keep only time through regex
    
    # further processing
    time2 = []
    for x in time:
        time2.append(x.split(' ')[1])
    
    time2 = pd.to_datetime(time2)  # transform time to datetime
    time2 = time2.tz_localize(tz='US/Eastern')  # make time EDT aware
    time2 = time2.tz_convert(tz=tz)  # transform time to departure's time zone
    
    coords['time'] = time2  # append time to lat/lon data
    
  
    m = visualize(coords)
    return m

In [11]:
def visualize(coords):
    # center to the mean of all points
    m = folium.Map(location=coords[["lat", "lon"]].mean().to_list(), zoom_start=5)

    # if the points are too close to each other, cluster them, create a cluster overlay with MarkerCluster
    # marker_cluster = MarkerCluster().add_to(m) ## commented out for now

    # draw the markers and assign popup and hover texts
    # add the markers the the cluster layers so that they are automatically clustered
    for i,r in coords.iterrows():
        location = (r["lat"], r["lon"])
        folium.Circle(location=location, 
                     popup = r['time'],
                     tooltip=r['time'],
                     radius=4,
                     color='red')\
        .add_to(m)

    # display the map
    return m

In [12]:
flightN='KLM1573'
date=20210715
url = query(flightN, date)
print(url)
tz = 'Europe/Amsterdam'

https://flightaware.com/live/flight/KLM1573/history/20210715/0635Z/EHAM/LGAV/tracklog


In [13]:
m = traj(url, tz)
m

 ***