In [4]:
import os
import re
import time
from pathlib import Path
from datetime import datetime, timedelta

import pandas as pd
import numpy as np

from urllib.request import urlopen
from bs4 import BeautifulSoup

from metar import Metar
from metpy.units import units
from metpy.calc import wind_components

In [9]:
def fetch(url):
    try:
        html = urlopen(url).read()
        soup = BeautifulSoup(html, features='html.parser')
        for script in soup(["script", "style"]):
            script.extract()
        return soup
    except Exception as e:
        print(e)
        return None

def fetch_metar_by_icao_and_date(icao, start_date, end_date):
    url = f"https://www.ogimet.com/display_metars2.php?lang=en&lugar={icao}&tipo=SA&ord=DIR&nil=NO&fmt=txt"
    
    url += '&ano={0:%Y}&mes={0:%m}&day={0:%d}&hora={0:%H}&min=00'.format(start_date)
    url += '&anof={0:%Y}&mesf={0:%m}&dayf={0:%d}&horaf={0:%H}&minf=59'.format(end_date)
    
    print(f"Fetching... {url}", flush=True)
    soup = fetch(url)
    data = []
    if soup is None:
        return data 
    text = soup.get_text()
    if f"No hay METAR/SPECI de {icao} en el periodo solicitado" in text:
        return data 
    if "Query made" not in text:
        now = datetime.datetime.now()
        text = re.sub('\s\s+', ' ', text)
        print(f"Response: {text}", flush=True)
        print(now.strftime("%Y-%m-%d %H:%M:%S"), f"Retry... {year}, {month}", flush=True)
        time.sleep(3610)
        return None
    
    text = re.sub('\s\s+', ' ', text)
    matches = re.findall(r"\s(\d+)[\s]METAR\s(.*)=", text)
    for match in matches:
        if ',' not in match:
            data.append({ 'datetime': datetime.strptime(match[0], '%Y%m%d%H%M'), 'metar': match[1] })

    return data

In [6]:
def parse_wind_components(obs):
    u, v = wind_components(obs.wind_speed.value() * units('knots'), obs.wind_dir.value() * units.degree)
    
    return (u.magnitude * units('m/s'), v.magnitude * units('m/s'))

In [7]:
def get_variables(metar):
    try:
        obs = Metar.Metar(metar)

        temp = obs.temp.value(units='K')
        (uwind, vwind) = parse_wind_components(obs)
        press = obs.press.value(units='HPA')

        return [temp, uwind, vwind, press]
    except Exception as e:
        return None

def save_hdf(date, station_coords, variables):
    df = pd.DataFrame(data=[station_coords + variables.tolist()], columns=['lat', 'long', 'temp', 'uwind', 'vwind', 'press'])

    filename= 'metar_{0:%Y}{0:%m}{0:%d}_{0:%H}_00.h5'.format(date)
    df.to_hdf(filename, key='df')
    print(f"{filename} saved")

In [10]:
station_icao = 'SKBQ'
(station_lat, station_lon) = "10.883333,-74.783333".split(',')
station_coords = [float(station_lat), float(station_lon)]

gfs_interval_forecast = int(3)
start_date = datetime.strptime('2022-03-28 00', '%Y-%m-%d %H') - timedelta(hours=gfs_interval_forecast)
end_date = datetime.strptime('2022-03-28 06', '%Y-%m-%d %H')

rows = fetch_metar_by_icao_and_date(station_icao, start_date, end_date)

Fetching... https://www.ogimet.com/display_metars2.php?lang=en&lugar=SKBQ&tipo=SA&ord=DIR&nil=NO&fmt=txt&ano=2022&mes=03&day=27&hora=21&min=00&anof=2022&mesf=03&dayf=28&horaf=06&minf=59


In [11]:
vars_acum = []
for row in rows:
    date = row['datetime']
    variables = get_variables(row['metar'])

    if variables is None:
        continue

    vars_acum.append(variables)

    if date.hour % gfs_interval_forecast == 0:
        save_hdf(date, station_coords, np.median(vars_acum, axis=0))
        vars_acum = []

metar_20220327_21_00.h5 saved
metar_20220328_00_00.h5 saved
metar_20220328_03_00.h5 saved
metar_20220328_06_00.h5 saved


In [14]:
pd.read_hdf('metar_20220327_21_00.h5')

Unnamed: 0,lat,long,temp,uwind,vwind,press
0,10.883333,-74.783333,302.15,-3.420201,-9.396926,1007.0
