
**METAR Decoding functions**

This code works as a pack of fuctions that can become handy during decoding metar reports. This one uses regular expressions and returns dictionary of values. For each part of metar, there is a separate function.

 You can also use metpy package or other code snipetts.


In [None]:
import re
import pandas as pd
import numpy as np

I highly recommend to use https://mesonet.agron.iastate.edu/request/download.phtml

Dataset is in the preprocessed format:
```
# station,valid,metar

LKKB,2023-01-01 00:00,LKKB 010000Z 19008KT CAVOK 14/07 Q1018

LKKB,2023-01-01 01:00,LKKB 010100Z 18008KT CAVOK 13/07 Q1019

LKKB,2023-01-01 02:00,LKKB 010200Z 21008KT CAVOK 13/06 Q1019
```

Naturally, you can use Ogimet.com as a datasource

In [None]:
# Regexes for decoding timers and headers
timer_re = r'\s(?P<day>\d{2})(?P<hour>\d{2})(?P<minute>\d{2})Z\s'
group_timer_re = r'\s(?P<sday>\d{2})(?P<shour>\d{2})\/(?P<eday>\d{2})(?P<ehour>\d{2})\s'


wind_pat =  r'\s(?P<dir>\d{3}|VRB)(?P<spd>\d{2})(?:G\d{2})?(?P<unit>KT|MPS)(?:.+(\d{3})V(\d{3}))?'
vis_pat = r'\s(?P<vis>\d{4})\s'
CAVOK_pat = r'(?P<cavok>CAVOK)'
AUTO_pat = r'(?P<auto>AUTO)'
phen_pat = r'\s(?P<int>(-|\+|VC|)*)?(?P<desc>(MI|PR|BC|DR|BL|SH|TS|FZ|)+)?(?P<prec>(DZ|RA|SN|SG|IC|PL|GR|GS|UP|/|)*)?(?P<obsc>(BR|FG|FU|VA|DU|SA|HZ|PY|)*)?(?P<other>PO|SQ|FC|SS|DS|NSW)?'
cld_pat = r'(VV|NSC|NCD|BKN|SCT|FEW|OVC|///)(\d{3}|///)((TCU|CB|///|))\s'
# CLDs are handled by separate function - re is different!
temp_pat = r'(?P<temp>(M|\s)\d{2})\/(?P<dp_temp>(M|)\d{2})'
press_pat = r'(Q|A)(?P<press>\d{4})'


# When using OGIMET or another database, this functiom might be usefull
def get_issue_date(metar):
  '''Retrieves datetime object of METAR report'''
  issue_dat_pat = r'(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})(?P<hour>\d{2})(?P<minute>\d{2})(\s)'
  try:
        a = re.search(issue_dat_pat, metar)
        return a.groupdict()
  except:
        return {
        'year': np.nan, 'month': np.nan,
        'day': np.nan, 'hour': np.nan,
        'minute': np.nan
    }


def handle_wind(metar):
    match = re.search(wind_pat, metar)
    return match.groupdict() if match else {'dir': np.nan, 'spd': np.nan, 'unit': np.nan}


def handle_vis(metar):
    a = re.search(vis_pat, metar)
    b = re.search(CAVOK_pat, metar)
    return a.groupdict() if a else {'vis': 9999} if b else {'vis': np.nan}

def handle_cld(metar):
   a = re.findall(cld_pat, metar)
   b = re.search(CAVOK_pat,metar)
   if b:
    dic_a = {'cover': 'NSC','height': '','cloud':''}
   elif a:
    dic_a = {}
    l = len(a)
    for i in range(0,l):
        dic_i = {'cover'+ str(i): a[i][0], 'height'+str(i): a[i][1], 'type'+str(i): a[i][2]}
        dic_a = {**dic_a, **dic_i}
   else:
     dic_a = {'cover': None ,'height': None,'cloud': None}
   return dic_a

In [None]:
def cut_trend(metar_raw):
    '''TREND must be cut away in order to get raw METAR and extract items'''
    trend_ind = ['TEMPO', 'BECMG', ' FM ']
    metar = re.split('TEMPO |BECMG | FM', metar_raw)[0]
    return metar

def handle_wind(metar):
   a = re.search(wind_pat, metar)
   dic_a = {}
   if a:
      dic_a = a.groupdict()
   else:
      dic_a = {'dir': None, 'spd': None, 'unit': None}

   return dic_a

def handle_vis(metar):
  a = re.search(vis_pat,metar)
  b = re.search(CAVOK_pat,metar)
  if a:
    vis_dict = a.groupdict()
  elif b:
    vis_dict = {'vis': 9999}
  else:
    vis_dict = {'vis': None}
  return vis_dict

def handle_clouds(metar):
   a = re.findall(cld_pat, metar)
   b = re.search(CAVOK_pat,metar)
   if b:
    dic_a = {'cover': 'NSC','height': '','cloud':''}
   elif a:
    dic_a = {}
    l = len(a)
    for i in range(0,l):
        dic_i = {'cover'+ str(i): a[i][0], 'height'+str(i): a[i][1], 'type'+str(i): a[i][2]}
        dic_a = {**dic_a, **dic_i}
   else:
     dic_a = {'cover': None ,'height': None,'cloud': None}
   return dic_a

In [None]:
from fractions import Fraction

def recalc_SMtoKM(metar):
  try:
    sm_pat = r'\s(?P<vis>(\d*|\d{1}\/\d{1}))(?P<units>SM)\s'
    #vis_pat = r'\s(?P<vis>\d{4})\s'

    z= re.search(sm_pat,metar)
    z = z.groupdict()
    a = re.search(sm_pat,metar)
    if z:
      visib = float(Fraction(z['vis'])) * 1600
      metar = metar.replace(z['vis']+z['units'],str(round(visib)))
    else:
      return metar
  except:
    return metar
  return metar

def parse_metar2(m_raw):
    # This function creates dataframe row from raw metar
    try:
      m_raw = recalc_SMtoKM(m_raw) # regular expressions to recalculate SM to Km
      '''This function will create dictionary from string in order to get it into Pandas'''

      CAVOK_dict = re.search(CAVOK_pat,m_raw)
      phen_dict = re.search(phen_pat,m_raw).groupdict()

      if 'AUTO' in m_raw:
        aut_dict={'auto': 1}
      else:
        aut_dict={'auto': 0}

      if CAVOK_dict:
        cav_dict={'cavok': 1}
      else:
        cav_dict={'cavok': 0}

      wind_dict = handle_wind(m_raw)
      vis_dict = handle_vis(m_raw)
      cld_dict = handle_cld(m_raw)

      dic_z={ **aut_dict, **wind_dict, **cav_dict, **vis_dict, **cld_dict}
    except:
        dic_z={}
        pass
    return pd.Series(dic_z)

def create_panda_from_metar_dict(metar_li):
 met_li = metar_li.split('=')
 lis_2 = []
 for met in met_li:
      if (len(met)>20):
        g = parse_metar2(met)
        lis_2 = lis_2 + [g]
      else:
        continue
 df = pd.DataFrame(lis_2)
 return df



def parse_metar_ogimet(m_raw):
    # parses metar from the ogimet source

    date_compl_pat = r'(?P<year>\d{4})(?P<month>\d{2})(?P<day>\d{2})(?P<hour>\d{2})(?P<minute>\d{2})(\s)'
    try:
      m_raw = recalc_SMtoKM(m_raw) # regular expressions to recalculate SM to Km
      '''This function will create dictionary from string in order to get it into Pandas'''
      date_dict = re.search(date_compl_pat,m_raw).groupdict()
      #wind_dict= re.search(wind_pat, m_raw).groupdict()
      CAVOK_dict = re.search(CAVOK_pat,m_raw)
      phen_dict = re.search(phen_pat,m_raw).groupdict()
      #AUTO_dict = re.search(AUTO_pat,m_raw)
      if 'AUTO' in m_raw:
        aut_dict={'auto': 1}
      else:
        aut_dict={'auto': 0}

      if CAVOK_dict:
        cav_dict={'cavok': 1}
      else:
        cav_dict={'cavok': 0}
      wind_dict = handle_wind(m_raw)
      vis_dict = handle_vis(m_raw)
      cld_dict = handle_cld(m_raw)
      dic_z={ **date_dict,**aut_dict, **wind_dict, **cav_dict, **vis_dict, **cld_dict}
    except:
        dic_z={}
        pass
    return pd.Series(dic_z)

In [None]:
metar = '20220600Z METAR LKMT 031430Z 19012KT 150V210 9999 FEW036 BKN023 21/13 Q1008 NOSIG='


In [None]:
create_panda_from_metar_dict(metar)

Unnamed: 0,auto,dir,spd,unit,cavok,vis,cover0,height0,type0,cover1,height1,type1
0,0,190,12,KT,0,9999,FEW,36,,BKN,23,
