Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
haskenthetical/src/Parser.hs
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
276 lines (236 sloc)
9.19 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
module Parser | |
( parseMType | |
, parsePType | |
, parseWholeFile | |
, treeToExpr | |
, treeToStmt | |
, treesToStmts | |
, treeToTopLevel | |
) where | |
import Prelude.Extra | |
import Data.Bifunctor (first) | |
import qualified Data.Char as Char | |
import Data.List (foldl1') | |
import qualified Data.Map.Strict as Map | |
import qualified Data.Set as Set | |
import qualified Data.Text as Text | |
import Data.Void (Void) | |
import Text.Megaparsec | |
import Text.Megaparsec.Char | |
import qualified Text.Megaparsec.Char.Lexer as L | |
import Env | |
import Syntax | |
type Parser = Parsec Void String | |
sc :: Parser () | |
sc = L.space space1 lineComment empty | |
where | |
lineComment = try $ do | |
(string "#!" <|> string "#") | |
lookAhead space1 | |
void $ takeWhileP (Just "character") (/= '\n') | |
lexeme :: Parser a -> Parser a | |
lexeme = L.lexeme sc | |
symbol :: String -> Parser String | |
symbol = L.symbol sc | |
-- Having to list all special characters in one place seems awkward. | |
isNonSyntax :: Char -> Bool | |
isNonSyntax c = Char.isPrint c | |
&& not (Char.isSpace c) | |
&& notElem c ("()\"" :: String) | |
stParser :: Parser SyntaxTree | |
stParser = | |
-- A bareword can contain numeric characters, it just can't start with | |
-- something that might parse as a number. We can't simply forbid them to | |
-- start with numeric characters, because we need to forbid e.g. "+5x" while | |
-- accepting "+x". | |
STBare <$> (notFollowedBy stFloat' >> stBare) | |
<|> STFloat <$> stFloat | |
<|> STString <$> stString | |
<|> STTree <$> stTree | |
stFloat' :: Parser Double | |
stFloat' = try (L.signed (pure ()) L.float) | |
<|> try (fromIntegral @Int <$> L.signed (pure ()) L.decimal) | |
stFloat :: Parser Double | |
stFloat = | |
-- This forbids `5x` from being parsed as `5 x`. But it accepts `5"foo"` as `5 | |
-- "foo"`. We could add more `notFollowedBy`, but maybe the `sepBy` in | |
-- `stTree` is actually the place to look? But that's awkward because we do | |
-- want `(foo)(bar)` to work. So probably we just need to replace `sepBy` with | |
-- something custom. But this works for now. | |
lexeme $ stFloat' <* notFollowedBy stBare | |
stString :: Parser Text | |
stString = Text.pack <$> (char '"' >> manyTill charLiteral (char '"')) | |
charLiteral :: Parser Char | |
charLiteral = label "literal char" $ anySingle >>= \case | |
'\\' -> anySingle >>= \case | |
'\\' -> pure '\\' | |
'"' -> pure '"' | |
'n' -> pure '\n' | |
x -> failure (Just $ Tokens $ x :| []) | |
(Set.fromList $ Tokens . (:| []) <$> "\"\\n") | |
x -> pure x | |
stBare :: Parser Text | |
stBare = fmap Text.pack | |
$ lexeme | |
$ takeWhile1P (Just "non-syntax character") isNonSyntax | |
stTree :: Parser [SyntaxTree] | |
stTree = between (symbol "(") (symbol ")") (sepBy stParser sc) | |
stWholeFile :: Parser [SyntaxTree] | |
stWholeFile = between sc eof (sepBy stParser sc) | |
parseWholeFile :: String -> String -> Either CompileError [SyntaxTree] | |
parseWholeFile fName input = | |
first (CEParseError . Text.pack . errorBundlePretty) | |
$ parse stWholeFile fName input | |
treeToTopLevel :: SyntaxTree -> TopLevel Ps | |
treeToTopLevel = \case | |
STTree (STBare "declarations" : body) -> DeclarationsPs NoExt body | |
t -> OtherTopLevelPs NoExt t | |
treeToStmt :: FullEnv -> SyntaxTree -> Either Text (Stmt Ps) | |
treeToStmt env = \case | |
STTree [STBare "def", name, body] -> do | |
n <- parseTyped parseName env name | |
b <- parseTyped (treeToExpr env) env body | |
return $ Def n b | |
STTree (STBare "def" : _) -> | |
Left "bad def" | |
STTree [STBare "defmacro", name, body] -> do | |
n <- parseName name | |
b <- parseTyped (treeToExpr env) env body | |
return $ DefMacro n b | |
STTree (STBare "defmacro" : _) -> | |
Left "bad defmacro" | |
STTree (STBare "type" : typeCon : dataCons) -> do | |
let parseTCon :: MType Ps -> Either Text (Name, [Name]) | |
parseTCon = \case | |
TCon (TC NoExt n) -> return (n, []) | |
TVar _ -> Left "can't declare a type variable" | |
_ `TApp` (TCon _) -> Left "type constructor where I expected a var" | |
_ `TApp` (TApp _ _) -> Left "type application where I expected a var" | |
a `TApp` (TVar (TV NoExt v)) -> fmap (++ [v]) <$> parseTCon a | |
_ `TApp` (MacroMType NoExt _ _) -> Left "macro where I expected a var" | |
MacroMType NoExt _ _ -> Left "can't use macros in type declarations" | |
(typeName, typeVars) <- parseTCon =<< parseMType env typeCon | |
constrs <- forM dataCons $ \case | |
STBare cname -> return (Name cname, []) | |
STTree (STBare cname : args) -> | |
fmap (Name cname,) $ mapM (parseMType env) args | |
_ -> Left "Bad constructor" | |
return $ TypeDecl $ TypeDecl' typeName typeVars constrs | |
STTree (STBare "type" : _) -> | |
Left "bad type" | |
STTree (STBare n : rest) | isMacro env n -> | |
return $ MacroStmt NoExt (Name n) rest | |
x -> Expr <$> parseTyped (treeToExpr env) env x | |
parseTyped | |
:: (SyntaxTree -> Either Text a) | |
-> FullEnv | |
-> SyntaxTree | |
-> Either Text (Typed Ps a) | |
parseTyped parseUntyped env = \case | |
STTree [STBare ":", x, typ] -> do | |
parsed <- parseTyped parseUntyped env x | |
case parsed of | |
-- maybe ideally `Typed` would be isomorphic to `([Type], )` | |
Typed _ _ -> Left "Can't type something twice" | |
UnTyped e -> Typed <$> parsePType env typ <*> pure e | |
STTree (STBare ":" : _) -> do | |
Left "Bad type annotation" | |
x -> UnTyped <$> parseUntyped x | |
parseName :: SyntaxTree -> Either Text Name | |
parseName = \case | |
STBare n -> return $ Name n | |
x -> Left $ "could not parse name: " <> tshow x | |
treeToExpr :: FullEnv -> SyntaxTree -> Either Text (Expr Ps) | |
treeToExpr env = \case | |
STString s -> return $ Val (Literal $ String s) | |
STFloat f -> return $ Val (Literal $ Float f) | |
STBare b -> return $ Var (Name b) | |
STTree [] -> Left "() is not currently a thing" | |
STTree [STBare "λ", params, body] -> do | |
b <- parseTyped (treeToExpr env) env body | |
let go xs = case xs of | |
[] -> Left "lambda needs at least one arg" | |
[n] -> Lam <$> parseTyped parseName env n <*> pure b | |
(n:ns) -> Lam <$> parseTyped parseName env n <*> (UnTyped <$> go ns) | |
case params of | |
STTree xs -> go xs | |
STBare x -> go [STBare x] | |
_ -> Left "bad lambda arg" | |
STTree (STBare "λ":_) -> | |
Left "bad lambda expr" | |
STTree [STBare "let", STTree bindings, body] -> do | |
bs <- parseBindings bindings | |
b <- parseTyped (treeToExpr env) env body | |
return $ Let bs b | |
STTree (STBare "let" : _) -> | |
Left "bad let" | |
STTree [STBare "letrec", STTree bindings, body] -> do | |
bs <- parseBindings bindings | |
b <- parseTyped (treeToExpr env) env body | |
return $ LetRec bs b | |
STTree (STBare "letrec":_) -> | |
Left "bad letrec" | |
STTree [STBare "if~", inTree, pattern, thenTree, elseTree] -> do | |
i <- parseTyped (treeToExpr env) env inTree | |
p <- parseTyped (parsePattern env) env pattern | |
t <- parseTyped (treeToExpr env) env thenTree | |
e <- parseTyped (treeToExpr env) env elseTree | |
return $ IfMatch i p t e | |
STTree (STBare "if~" : _) -> | |
Left "bad if~" | |
STTree (STBare n : rest) | isMacro env n -> | |
return $ MacroExpr NoExt (Name n) rest | |
STTree [a] -> (treeToExpr env) a | |
STTree [a1, a2] -> do | |
a1' <- parseTyped (treeToExpr env) env a1 | |
a2' <- parseTyped (treeToExpr env) env a2 | |
return $ Call a1' a2' | |
STTree (a1:a2:as) -> (treeToExpr env) $ STTree $ STTree [a1, a2] : as | |
where | |
parseBindings [] = return [] | |
parseBindings (STTree [name, v] : bs) = do | |
n <- parseTyped parseName env name | |
v' <- parseTyped (treeToExpr env) env v | |
((n, v') :) <$> parseBindings bs | |
parseBindings x = Left $ "could not parse bindings: " <> tshow x | |
treesToStmts :: FullEnv -> [SyntaxTree] -> Either CompileError [Stmt Ps] | |
treesToStmts env = mapM (first CEMalformedExpr . treeToStmt env) | |
parsePattern :: FullEnv -> SyntaxTree -> Either Text (Pattern Ps) | |
parsePattern env = \case | |
STBare n -> return $ case Text.stripPrefix "$" n of | |
Just n' -> PatVal $ Name n' | |
Nothing -> PatConstr (Name n) [] | |
STFloat n -> return $ PatLiteral $ Float n | |
STString s -> return $ PatLiteral $ String s | |
STTree [] -> Left "Empty pattern" | |
STTree (x:xs) -> parsePattern env x >>= \case | |
PatLiteral _ -> Left "Cannot have a literal at the head of a pattern" | |
PatVal _ -> Left "Cannot have a var at the head of a pattern" | |
PatConstr n ys -> do | |
xs' <- traverse (parseTyped (parsePattern env) env) xs | |
return $ PatConstr n (ys ++ xs') | |
parsePType :: FullEnv -> SyntaxTree -> Either Text (PType Ps) | |
parsePType env = \case | |
-- There's no way to parse a `Forall [] (MacroMType ...)`. But if the macro | |
-- expands into an MType, then as a PType it'll expand into a `Forall [] ...`, | |
-- so we don't lose expressivity. | |
STTree (STBare n : rest) | isMacro env n -> | |
return $ MacroPType NoExt (Name n) rest | |
tree -> Forall [] <$> parseMType env tree | |
parseMType :: FullEnv -> SyntaxTree -> Either Text (MType Ps) | |
parseMType env = \case | |
STBare n -> return $ case Text.take 1 n of | |
"$" -> TVar $ TV NoExt $ Name n | |
_ -> TCon $ TC NoExt $ Name n | |
STFloat _ -> Left "Cannot put a Float in a type" | |
STString _ -> Left "Cannot put a String in a type" | |
STTree [] -> Left "Empty type" | |
STTree (STBare n : rest) | isMacro env n -> | |
return $ MacroMType NoExt (Name n) rest | |
STTree xs -> do | |
ts <- mapM (parseMType env) xs | |
return $ foldl1' TApp ts | |
isMacro :: FullEnv -> Text -> Bool | |
isMacro env n = case Map.lookup (Name n) (feVars env) of | |
Nothing -> False | |
Just (t, _) -> t == Forall [] tMacro |