In [2]:
import os
import re
import time
from pathlib import Path
from datetime import datetime, timedelta

import pandas as pd

from urllib.request import urlopen
from bs4 import BeautifulSoup

from metar import Metar
from metpy.units import units
from metpy.calc import wind_components

In [3]:
def fetch(url):
    try:
        html = urlopen(url).read()
        soup = BeautifulSoup(html, features='html.parser')
        for script in soup(["script", "style"]):
            script.extract()
        return soup
    except Exception as e:
        print(e)
        return None

def fetch_metar_by_icao_and_date(icao, start_date):
    url = f"https://www.ogimet.com/display_metars2.php?lang=en&lugar={icao}&tipo=SA&ord=DIR&nil=NO&fmt=txt"
    
    url += f"&ano={start_date.year}&mes={start_date.month}&day={start_date.day}&hora={start_date.hour}&min=00"
    
    print(f"Fetching... {url}", flush=True)
    soup = fetch(url)
    data = []
    if soup is None:
        return data 
    text = soup.get_text()
    if f"No hay METAR/SPECI de {station} en el periodo solicitado" in text:
        return data 
    if "Query made" not in text:
        now = datetime.datetime.now()
        text = re.sub('\s\s+', ' ', text)
        print(f"Response: {text}", flush=True)
        print(now.strftime("%Y-%m-%d %H:%M:%S"), f"Retry... {year}, {month}", flush=True)
        time.sleep(3610)
        return None
    
    text = re.sub('\s\s+', ' ', text)
    matches = re.findall(r"\s(\d+)[\s]METAR\s(.*)=", text)
    for match in matches:
        if ',' not in match:
            data.append({ 'datetime': datetime.strptime(match[0], '%Y%m%d%H%M'), 'metar': match[1] })

    return data

In [4]:
def parse_wind_components(obs):
    u, v = wind_components(obs.wind_speed.value() * units('knots'), obs.wind_dir.value() * units.degree)
    
    return (u.magnitude, v.magnitude)

In [5]:
def store_obs(metar, date, station_coords):
    try:
        obs = Metar.Metar(metar)
        temp = obs.temp.value(units='K')
        (uwind, vwind) = parse_wind_components(obs)
        press = obs.press.value(units='HPA')

        df = pd.DataFrame(data=[[station_coords[0], station_coords[1], temp, uwind, vwind, press]], columns=['lat', 'long', 'temp', 'uwind', 'vwind', 'press'])

        filename= 'metar_{0:%Y}{0:%m}{0:%d}_{0:%H}_00.h5'.format(date)
        print(f"{filename} saved")
        df.to_hdf(filename, key='df')
    except Exception as e:
        print(e)

In [6]:
station = 'SKBQ'
# 10° 53' N, 074° 47' W
station_coords = (10.883333, -74.783333)

start_date = datetime.strptime('2022-03-28 12', '%Y-%m-%d %H')

Path('surface-data').mkdir(parents=True, exist_ok=True)
os.chdir('./surface-data')

obs = fetch_metar_by_icao_and_date(station, start_date)

for obs_item in obs:
    if obs_item['datetime'].hour % 3 == 0:
        store_obs(obs_item['metar'], obs_item['datetime'], station_coords)

Fetching... https://www.ogimet.com/display_metars2.php?lang=en&lugar=SKBQ&tipo=SA&ord=DIR&nil=NO&fmt=txt&ano=2022&mes=3&day=28&hora=12&min=00
metar_20220328_12_00.h5 saved
metar_20220328_15_00.h5 saved
Unparsed groups in body 'SCT018 SCT100 30/24' while processing 'SKBQ 281800Z 05013KT 999 SCT018 SCT100 30/24 Q1010 NOSIG RMK A2984'
metar_20220328_21_00.h5 saved


In [7]:
pd.read_hdf('metar_20220328_12_00.h5')

Unnamed: 0,lat,long,temp,uwind,vwind,press
0,10.883333,-74.783333,299.15,-0.868241,-4.924039,1010.0
