# METAR data on nearest airport
- Airports database: https://ourairports.com/data/
- METAR database: https://aviationweather.gov/dataserver/example?datatype=metar
- API request example: https://aviationweather.gov/adds/dataserver_current/httpparam?dataSource=metars&requestType=retrieve&format=xml&startTime=2023-01-21T19:09:20Z&endTime=2023-01-21T21:09:20Z&stationString=PHTO

In [1]:
# pip install beautifulsoup4
# pip install lxml

In [30]:
import requests
import json
from bs4 import BeautifulSoup

import numpy as np
import pandas as pd

In [31]:
### Load airports data
airports = pd.read_csv('airports.csv')
airports = airports[['ident', 'type', 'name', 'latitude_deg', 'longitude_deg','scheduled_service']]
airports = airports[airports.scheduled_service == 'yes'] # only airport equiped with weather station

In [32]:
def haversine(lon1, lat1, lon2, lat2):
    '''
    Calculate the great circle distance in nautical miles between 
    two points on the earth
    '''
    # Radius of earth in nm (6371 for miles)
    r = 3956 
    
    # degrees to radians 
    lon1, lat1, lon2, lat2 = map(np.radians, [lon1, lat1, lon2, lat2])

    # haversine formula 
    dlon, dlat = lon2 - lon1, lat2 - lat1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    return 2 * np.arcsin(np.sqrt(a)) * r

def get_nearest_airports(lat, lon, airport_df,limit=20):
    '''
    Find nearest airports from GPS coord (lat, lon) in airport_df database.
    Returns a dataframe with the best matches limited to the "limit" firsts
    '''
    best_match = airports.iloc[
        (abs(airports['latitude_deg']-lat) + abs(airports['longitude_deg']-lon)).argsort()
    ].head(limit)

    best_match['distance'] = best_match.apply(lambda x: haversine(lon, lat, x.longitude_deg, x.latitude_deg), axis=1)
    best_match = best_match.reset_index(drop=True)
    
    return best_match

In [33]:
get_nearest_airports(48.403088, 11.517864, airports,limit=5)

Unnamed: 0,ident,type,name,latitude_deg,longitude_deg,scheduled_service,distance
0,EDDM,large_airport,Munich Airport,48.353802,11.7861,yes,12.763399
1,LOWI,medium_airport,Innsbruck Airport,47.260201,11.344,yes,79.321257
2,EDDN,large_airport,Nuremberg Airport,49.498699,11.078056,yes,78.230713
3,EDJA,medium_airport,Memmingen Allgau Airport,47.9888,10.2395,yes,65.419862
4,LOWS,medium_airport,Salzburg Airport,47.793301,13.0043,yes,80.439008


In [52]:
def request_metars(start_date,end_date,oaci):
    '''
    Read xml file from https://aviationweather.gov/dataserver/example?datatype=metar API
    and extract METARS from oaci airport between start_date and end_date
    date format: "YYYY-MM-DD hh:mm:ss"
    returns a pandas dataframe with METAR msg + relevant info
    '''
    sd, sh = start_date.split()[0], start_date.split()[1]
    ed, eh = end_date.split()[0], end_date.split()[1]
    
    url = "https://aviationweather.gov/adds/dataserver_current/httpparam?"\
        + "dataSource=metars&requestType=retrieve&format=xml&"\
        +"startTime={}T{}Z&endTime={}T{}Z&stationString={}".format(sd,sh,ed,eh,oaci)
    
    response = requests.get(url)
    bs_data = BeautifulSoup(response.text, "xml")
    #print(bs_data.prettify()) # uncomment to see all xml file
    
    dict_METAR = {}
    for tag in ['observation_time','raw_text','temp_c','visibility_statute_mi','flight_category']:
        dict_METAR[tag] = []
        for link in bs_data.find_all(tag):
            dict_METAR[tag].append(link.contents[0]) 

    results = pd.DataFrame(dict_METAR)
    results['temp_c'] = results['temp_c'].astype(float)
    results['observation_time'] = results['observation_time'].apply(lambda x: x.replace('T',' ').replace('Z',''))
    results['visibility_statute_mi'] = results['visibility_statute_mi'].astype(float)
    
    return results.sort_values('observation_time')

In [53]:
request_metars('2023-01-21 19:09:20','2023-01-21 21:09:20','LFML')

Unnamed: 0,observation_time,raw_text,temp_c,visibility_statute_mi,flight_category
3,2023-01-21 19:30:00,LFML 211930Z AUTO 34007KT 310V020 CAVOK 03/M05...,3.0,6.21,VFR
2,2023-01-21 20:00:00,LFML 212000Z AUTO 36006KT 320V030 CAVOK 03/M05...,3.0,6.21,VFR
1,2023-01-21 20:30:00,LFML 212030Z AUTO 35006KT 310V030 CAVOK 02/M05...,2.0,6.21,VFR
0,2023-01-21 21:00:00,LFML 212100Z AUTO 36005KT CAVOK 02/M05 Q1015 T...,2.0,6.21,VFR


In [54]:
def estimate_weather(lat, lon, start_date, end_date, tries=20):
    
    nearest_apt = get_nearest_airports(lat, lon, airports,limit=tries)
    
    for idx in range(nearest_apt.shape[0]):
        
        nearest_apt.iloc[idx].ident
        df_metar = request_metars(start_date,end_date,nearest_apt.iloc[idx].ident)
        
        if df_metar.shape[0] > 0:
            df_metar['distance'] = nearest_apt.iloc[idx].distance
            return df_metar
        
    return 'no metar found'

In [55]:
lat, lon = 48.403088, 11.517864
start_date = '2023-01-21 19:09:20'
end_date = '2023-01-21 21:09:20'

estimate_weather(lat, lon, start_date, end_date, tries=20)

Unnamed: 0,observation_time,raw_text,temp_c,visibility_statute_mi,flight_category,distance
3,2023-01-21 19:20:00,EDDM 211920Z AUTO 33006KT 290V350 6000 -SN FEW...,-2.0,3.73,MVFR,12.763399
2,2023-01-21 19:50:00,EDDM 211950Z AUTO 33005KT 9000 -SN SCT015 BKN0...,-2.0,5.59,VFR,12.763399
1,2023-01-21 20:20:00,EDDM 212020Z AUTO 32004KT 9999 -SN FEW010 BKN0...,-2.0,6.21,MVFR,12.763399
0,2023-01-21 20:50:00,EDDM 212050Z AUTO 32006KT 9999 FEW010 BKN044 M...,-2.0,6.21,VFR,12.763399
