In [1]:
!pip3 install beautifulsoup4 --user
!pip3 install metar --user
!pip3 install -q -U tqdm --user



In [16]:
import re
import multiprocessing
from urllib.request import urlopen
from bs4 import BeautifulSoup
from tqdm.notebook import tqdm
from metar import Metar
import pandas as pd

In [2]:
def fetch(url):
  try:
    html = urlopen(url).read()
    soup = BeautifulSoup(html, features='html.parser')
    for script in soup(["script", "style"]):
        script.extract()
    return soup
  except Exception:
    return None

def get_stations_noaa():
  soup = fetch('https://tgftp.nws.noaa.gov/data/observations/metar/stations/')
  stations = []
  for link in soup.find_all('a'):
    href = link.get('href')
    if 'TXT' in href:
      stations.append(href.split('.TXT')[0])
  return stations

def get_stations_with_coords():
  soup = fetch('http://weather.rap.ucar.edu/surface/stations.txt')
  text = soup.get_text()
  matches = re.findall(r".*([A-Z]{4}).*[\s]([\d]+\s[\d]+\w).*[\s]([\d]+\s[\d]+\w)[\s]*(\d{1,4}).*", text)
  stations = []
  for match in matches:
    stations.append({'ICAO': match[0], 'coords': "+".join(match[1:]).replace(' ', '-')})
  return stations

def get_all_metar_obs(station, month):
  soup = fetch(f"https://www.ogimet.com/display_metars2.php?lugar={station['ICAO']}&tipo=SA&ord=DIR&nil=NO&fmt=txt&ano=2020&mes={month}&day=1&hora=00&anof=2020&mesf={month}&dayf=30&horaf=12&minf=59&enviar=Ver")
  if soup is None:
    return []
  text = soup.get_text()
  if f"No hay METAR/SPECI de {station} en el periodo solicitado" in text:
    return []
  data = []
  for ob in re.findall("METAR\s(.*)=", text):
    if ',' not in ob:
      data.append({ 'coords': station['coords'], 'observation': ob })
  return data

In [3]:
noaa_stations = get_stations_with_coords()
print(len(noaa_stations))

9580


In [4]:
test_month = 9
test_station = noaa_stations[50]
print(test_station)
metar = get_all_metar_obs(test_station, test_month)

{'ICAO': 'PAEL', 'coords': '58-12N+136-21W+3'}


In [5]:
metar

[{'coords': '58-12N+136-21W+3',
  'observation': 'PAEL 061445Z 00000KT 25SM SCT250 07/06 A3035 RMK NOSPECI'},
 {'coords': '58-12N+136-21W+3',
  'observation': 'PAEL 070049Z VRB04KT 20SM OVC140 16/05 A3044 RMK NOSPECI'},
 {'coords': '58-12N+136-21W+3',
  'observation': 'PAEL 111447Z 00000KT 25SM SKC 07/06 A3020 RMK NOSPECI'},
 {'coords': '58-12N+136-21W+3',
  'observation': 'PAEL 120048Z VRB05KT 20SM SKC 13/07 A3013 RMK NOSPECI'},
 {'coords': '58-12N+136-21W+3',
  'observation': 'PAEL 121445Z 00000KT 25SM SKC 06/04 A3012 RMK NOSPECI'},
 {'coords': '58-12N+136-21W+3',
  'observation': 'PAEL 130058Z 10008KT 20SM SCT150 15/M00 A3005 RMK NOSPECI'},
 {'coords': '58-12N+136-21W+3',
  'observation': 'PAEL 131445Z 00000KT 25SM SKC 06/04 A3004 RMK NOSPECI'},
 {'coords': '58-12N+136-21W+3',
  'observation': 'PAEL 141445Z 00000KT 25SM SKC 07/03 A3011 RMK NOSPECI'},
 {'coords': '58-12N+136-21W+3',
  'observation': 'PAEL 151445Z 00000KT 25SM FEW300 05/04 A3014 RMK NOSPECI'},
 {'coords': '58-12N+136-

In [6]:
test = metar[0]
print(test['coords'])
obs = Metar.Metar(test['observation'])
print(obs.string())

58-12N+136-21W+3
station: PAEL
type: routine report, cycle 15 (automatic report)
time: Tue Oct  6 14:45:00 2020
temperature: 7.0 C
dew point: 6.0 C
wind: calm
visibility: 25 miles
pressure: 1027.8 mb
sky: scattered clouds at 25000 feet
- NOSPECI
METAR: PAEL 061445Z 00000KT 25SM SCT250 07/06 A3035 RMK NOSPECI


In [7]:
def get_metars(station):
    localdata=[]
    for idx in range(0, 2):
        metar_obs = get_all_metar_obs(station, month-idx)
        for obs_item in metar_obs:
            localdata.append(f"{obs_item['coords']},{obs_item['observation']}")
    return localdata

In [13]:
month = 9
if __name__ == '__main__':
    trabajos=multiprocessing.cpu_count()
    with multiprocessing.Pool(trabajos-1) as p:
        data=p.map(get_metars,noaa_stations)      

In [14]:
data2=[]
for lista in data:
    if lista is not None:
        for i in lista:
            data2.append(i)
print(len(data2))

2702836


In [24]:
path = f'{month-1}-{month}_metar.csv'
f = open(path,'w')
f.write('coordinates,observation\n')
for line in data2:
    f.write(line)
f.close()