## Parsing in Haskell

This is an experiment to teach myself how to parse things in Haskell. A lot of the tutorials start a little too deep in the pool for my comfort. I will revisit them later when I feel more comfortable. For now, this little gem is totally my speed: https://two-wrongs.com/parser-combinators-parsing-for-haskell-beginners

In [1]:
import Text.ParserCombinators.ReadP

In [14]:
-- Construct primitives that perform a test
isVowel :: Char -> Bool
isVowel char =
    any (char ==) "aouei"

-- Create a readParser <type> that tests if the constraint is met
vowel :: ReadP Char
vowel =
    satisfy isVowel
    
-- The readP_to_S takes a ReadP a and a string and returns a [('a', rest of the string)]
print $ readP_to_S vowel "aeig"

[('a',"eig")]

In [22]:
-- many1 returns a list of all succesful parses
capitals :: String
capitals = ['A'..'Z']

-- Following the example from above
isCapital :: Char -> Bool
isCapital char =
    any (char ==) capitals

-- Define an aiport parser
airport :: ReadP String
airport =
    many1 $ satisfy isCapital

-- That's pretty cool. We now have a list of all possible conforming parses
readP_to_S airport "BIRK 281500Z 09014KT CAVOK M03/M06 Q0980 R13/910195"

[("B","IRK 281500Z 09014KT CAVOK M03/M06 Q0980 R13/910195"),("BI","RK 281500Z 09014KT CAVOK M03/M06 Q0980 R13/910195"),("BIR","K 281500Z 09014KT CAVOK M03/M06 Q0980 R13/910195"),("BIRK"," 281500Z 09014KT CAVOK M03/M06 Q0980 R13/910195")]

In [26]:
-- Let's play around with monads!
airport :: ReadP String
airport = do
    code <- many1 $ satisfy isCapital
    satisfy (== ' ')
    return code
    

-- So the monad takes the last function returned. If you comment out satisfy, you get the full list back. 
-- It would appear the parse monad behaves a bit like the list monad. Code is a list of all possible combinations
-- which is then filtered by the satisfy the next character is a space constraint
readP_to_S airport "BIRK 281500Z 09014KT CAVOK M03/M06 Q0980 R13/910195"

[("BIRK","281500Z 09014KT CAVOK M03/M06 Q0980 R13/910195")]

In [34]:
-- So let's create some numbers as a definition / constant
digits :: String
digits = ['0'..'9']


-- Let's create a function to test whether a character is a digit
isDigit :: Char -> Bool
isDigit char = 
    any (char ==) digits


-- Now we can create a parser based on that test
digit :: ReadP Char
digit =
    satisfy isDigit

-- Works just like before. Now let's step this up a bit.
readP_to_S digit "52"

[('5',"2")]

In [37]:
-- In the context of a monad, we define a sequence as to how these parsers should be applied sequentially to 
-- pass our parser spec
-- Note that count will allow you to run a parser n times
-- Also note, string is a convenience function to match an exact string instad of a series of characters
timestamp :: ReadP (Int, Int, Int)
timestamp = do
    day <- count 2 digit
    hour <- count 2 digit
    minute <- count 2 digit
    string "Z "
    return (read day, read hour, read minute)
    
    
readP_to_S timestamp "191201Z "

[((19,12,1),"")]

In [39]:
-- Adding some error checking to our parser 
timestamp :: ReadP (Int, Int, Int)
timestamp = do
    day <- numbers 2
    hour <- numbers 2
    minute <- numbers 2
    string "Z "
    if day < 1 || day > 31 || hour > 23 || minute > 59 then
        pfail
    else
        return (day, hour, minute)
        
numbers :: Int -> ReadP Int
numbers digits =
    fmap read (count digits digit)
    
-- This should fail
-- readP_to_S timestamp "888990Z "

-- This should succeed
readP_to_S timestamp "302359Z "

[((30,23,59),"")]

In [40]:
import Control.Applicative

-- Let's add applicatives and alternatives!!!
-- Basically we are saying we want to optionally parse one or another
-- Note, while we are in the parse monad context, we are also normalizing the speed.
windInfo :: ReadP (Int, Int)
windInfo = do
    direction <- numbers 3
    speed <- numbers 2 <|> numbers 3
    unit <- string "KT" <|> string "MPS"
    string " "
    return (direction, toMPS unit speed)

toMPS :: String -> Int -> Int
toMPS unit speed =
    case unit of
         "KT" -> div speed 2
         "MPS" -> speed
         
readP_to_S windInfo "09014KT "

[((90,7),"")]

In [41]:
-- Create a parser for optional gusts of wind in the reading
gustParser :: ReadP Int
gustParser = do
    satisfy (== 'G')
    numbers 2 <|> numbers 3


-- Change the parser monad to incorporate the gust parser. Please note, this new gust parser takes a 0 and a parser
-- If the parse fails, it returns the first value
windInfo :: ReadP (Int, Int, Int)
windInfo = do
    direction <- numbers 3
    speed <- numbers 2 <|> numbers 3
    gusts <- option 0 gustParser 
    unit <- string "KT" <|> string "MPS"
    string " "
    return (direction, toMPS unit speed, toMPS unit gusts)
    
    
readP_to_S windInfo "09014KT "

[((90,7,0),"")]

In [42]:
-- We don't like the default of gusts as zero if they are absent. Let's make them a maybe instead and return a record
-- instead of an ugly tuple
data WindInfo = WindInfo
    { dir :: Int
    , speed :: Int
    , gusts :: Maybe Int
    }
    deriving Show
    

windInfo :: ReadP WindInfo
windInfo = do
    direction <- numbers 3
    speed <- numbers 2 <|> numbers 3
    gusts <- option Nothing (fmap Just gustParser)
    unit <- string "KT" <|> string "MPS"
    string " "
    return (WindInfo
        direction
        (toMPS unit speed)
        (fmap (toMPS unit) gusts))
        
readP_to_S windInfo "09014KT "

[(WindInfo {dir = 90, speed = 7, gusts = Nothing},"")]

In [43]:
-- BEHOLD!!!
-- Let's get combinating!

data Report = Report
    { station :: String
    , time :: (Int, Int, Int)
    , wind :: WindInfo
    }
    deriving Show

metar :: ReadP Report
metar = do
    code <- airport
    time <- timestamp
    wind <- windInfo
    return (Report code time wind)
    
readP_to_S metar "BIRK 281500Z 09014G17KT CAVOK M03/M06 Q0980 R13/910195"

[(Report {station = "BIRK", time = (28,15,0), wind = WindInfo {dir = 90, speed = 7, gusts = Just 8}},"CAVOK M03/M06 Q0980 R13/910195")]