diff --git a/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser/Lexer.hs b/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser/Lexer.hs new file mode 100644 index 00000000000..c102baab83a --- /dev/null +++ b/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser/Lexer.hs @@ -0,0 +1,96 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeFamilies #-} +module UntypedPlutusCore.Lexer where + +import PlutusCore.Lexer +import PlutusCore.Lexer.Type as L (Token) +import PlutusPrelude (NonEmpty ((:|)), Pretty (pretty), Render (render)) + +import qualified Data.List as DL +import qualified Data.List.NonEmpty as NE +import Data.Proxy (Proxy (..)) +import Text.Megaparsec + +data WithPos a = WithPos + { startPos :: SourcePos + , endPos :: SourcePos + , tokenLength :: Int + , tokenVal :: a + } deriving (Eq, Ord, Show) + +data MyStream = MyStream + { myStreamInput :: String -- for showing offending lines + , unMyStream :: [WithPos (L.Token AlexPosn)] + } +instance Stream MyStream where + type Token MyStream = WithPos (L.Token AlexPosn) + type Tokens MyStream = [WithPos (L.Token AlexPosn)] + + tokenToChunk Proxy x = [x] + tokensToChunk Proxy xs = xs + chunkToTokens Proxy = id + chunkLength Proxy = length + chunkEmpty Proxy = null + take1_ (MyStream _ []) = Nothing + take1_ (MyStream str (t:ts)) = Just + ( t + , MyStream (drop (tokensLength pxy (t:|[])) str) ts + ) + takeN_ n (MyStream str s) + | n <= 0 = Just ([], MyStream str s) + | null s = Nothing + | otherwise = + let (x, s') = splitAt n s + in case NE.nonEmpty x of + Nothing -> Just (x, MyStream str s') + Just nex -> Just (x, MyStream (drop (tokensLength pxy nex) str) s') + takeWhile_ f (MyStream str s) = + let (x, s') = DL.span f s + in case NE.nonEmpty x of + Nothing -> (x, MyStream str s') + Just nex -> (x, MyStream (drop (tokensLength pxy nex) str) s') + +showToken :: L.Token AlexPosn -> String +showToken = render . pretty + +instance VisualStream MyStream where + showTokens Proxy = unwords + . NE.toList + . fmap (showToken . tokenVal) + tokensLength Proxy xs = sum (tokenLength <$> xs) + +instance TraversableStream MyStream where + reachOffset o PosState {..} = + ( Just (prefix ++ restOfLine) + , PosState + { pstateInput = MyStream + { myStreamInput = postStr + , unMyStream = post + } + , pstateOffset = max pstateOffset o + , pstateSourcePos = newSourcePos + , pstateTabWidth = pstateTabWidth + , pstateLinePrefix = prefix + } + ) + where + prefix = + if sameLine + then pstateLinePrefix ++ preStr + else preStr + sameLine = sourceLine newSourcePos == sourceLine pstateSourcePos + newSourcePos = + case post of + [] -> pstateSourcePos + (x:_) -> startPos x + (pre, post) = splitAt (o - pstateOffset) (unMyStream pstateInput) + (preStr, postStr) = splitAt tokensConsumed (myStreamInput pstateInput) + tokensConsumed = + case NE.nonEmpty pre of + Nothing -> 0 + Just nePre -> tokensLength pxy nePre + restOfLine = takeWhile (/= '\n') postStr + +pxy :: Proxy MyStream +pxy = Proxy diff --git a/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser/NewParser.hs b/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser/NewParser.hs new file mode 100644 index 00000000000..4e86f29fe07 --- /dev/null +++ b/plutus-core/untyped-plutus-core/src/UntypedPlutusCore/Parser/NewParser.hs @@ -0,0 +1,144 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE TypeApplications #-} +{-# LANGUAGE TypeOperators #-} + +{-# OPTIONS_GHC -fno-warn-orphans #-} + +-- | Parser for untyped Plutus Core using megaparsec, as in Plutus IR. +-- Will replace UntypedPlutusCore.Parser.hs. +-- Parser.y and Lexer.x, which currently generate Parser.hs, will be removed. + +module UntypedPlutusCore.NewParser + ( topSourcePos + , parse + , parseQuoted + , term + , program + , parseTerm + , parseProgram + , parseScoped + , Parser + , SourcePos + ) where + +import Prelude hiding (fail) + +import Control.Monad.Except ((<=<)) +import Control.Monad.State (StateT) + +import qualified NewUntypedPlutusCore as UPLC +import qualified PlutusCore as PLC +import qualified PlutusCore.Parsable as PLC +import PlutusPrelude (Pretty, through) +import Text.Megaparsec hiding (ParseError, State, parse) +import UntypedPlutusCore.Check.Uniques (checkProgram) +import UntypedPlutusCore.Rename (Rename (rename)) + +import Data.ByteString.Lazy (ByteString) +import Data.ByteString.Lazy.Internal (unpackChars) +import qualified Data.Text as T +import PlutusCore.ParserCommon + +-- Parsers for UPLC terms + +conTerm + :: (PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) + => Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) +conTerm = inParens $ UPLC.Constant <$> wordPos "con" <*> constant + +builtinTerm :: (Bounded fun, Enum fun, Pretty fun) + => Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) +builtinTerm = inParens $ UPLC.Builtin <$> wordPos "builtin" <*> builtinFunction + +varTerm :: Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) +varTerm = UPLC.Var <$> getSourcePos <*> name + +lamTerm :: ParsecT (PLC.ParseError SourcePos) + T.Text + (StateT ParserState PLC.Quote) + (UPLC.Term PLC.Name uni fun SourcePos) + -> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) +lamTerm tm = inParens $ UPLC.LamAbs <$> wordPos "lam" <*> name <*> tm + +appTerm :: ParsecT (PLC.ParseError SourcePos) + T.Text + (StateT ParserState PLC.Quote) + (UPLC.Term PLC.Name uni fun SourcePos) + -> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) +appTerm tm = inBrackets $ UPLC.Apply <$> getSourcePos <*> tm <*> tm + +delayTerm :: ParsecT (PLC.ParseError SourcePos) + T.Text + (StateT ParserState PLC.Quote) + (UPLC.Term PLC.Name uni fun SourcePos) + -> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) +delayTerm tm = inParens $ UPLC.Delay <$> wordPos "abs" <*> tm + +forceTerm :: ParsecT (PLC.ParseError SourcePos) + T.Text + (StateT ParserState PLC.Quote) + (UPLC.Term PLC.Name uni fun SourcePos) + -> Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) +forceTerm tm = inBraces $ UPLC.Force <$> getSourcePos <*> tm + +errorTerm + :: Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) +errorTerm = inParens $ UPLC.Error <$> wordPos "error" + +-- | Parser for all UPLC terms. +term + :: ( PLC.Parsable (PLC.Some uni), PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable + , Bounded fun, Enum fun, Pretty fun, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) + => Parser SourcePos (UPLC.Term PLC.Name uni fun SourcePos) +term = conTerm + <|> builtinTerm + <|> varTerm + <|> lamTerm self + <|> appTerm self + <|> delayTerm self + <|> forceTerm self + <|> errorTerm + where self = term + +-- | Parser for UPLC programs. +program + :: ( PLC.Parsable (PLC.Some uni), PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable + , Bounded fun, Enum fun, Pretty fun, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) + => Parser SourcePos (UPLC.Program PLC.Name uni fun SourcePos) +program = whitespace >> do + prog <- inParens $ UPLC.Program <$> wordPos "program" <*> version <*> term + notFollowedBy anySingle + return prog + +-- | Generic parser function. +parseGen :: Parser SourcePos a -> ByteString -> Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) a +parseGen stuff bs = parse stuff "test" $ (T.pack . unpackChars) bs + +-- | Parse a PLC term. The resulting program will have fresh names. The underlying monad must be capable +-- of handling any parse errors. +parseTerm :: + (PLC.Parsable (PLC.Some uni), PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable + , Bounded fun, Enum fun, Pretty fun, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) => + ByteString -> + Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) (UPLC.Term PLC.Name uni fun SourcePos) +parseTerm = parseGen term + +-- | Parse a PLC program. The resulting program will have fresh names. The underlying monad must be capable +-- of handling any parse errors. +parseProgram :: (PLC.Parsable (PLC.Some uni), PLC.Closed uni, uni `PLC.Everywhere` PLC.Parsable + , Bounded fun, Enum fun, Pretty fun, PLC.Parsable (PLC.SomeTypeIn (PLC.Kinded uni))) => ByteString -> Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) (UPLC.Program PLC.Name uni fun SourcePos) +parseProgram = parseGen program + +-- | Parse and rewrite so that names are globally unique, not just unique within +-- their scope. +parseScoped + :: (PLC.MonadQuote (Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos))) + , PLC.AsUniqueError (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) SourcePos + , PLC.Parsable (PLC.Some PLC.DefaultUni)) + => ByteString + -> Either (ParseErrorBundle T.Text (PLC.ParseError SourcePos)) (UPLC.Program PLC.Name PLC.DefaultUni PLC.DefaultFun SourcePos) +-- don't require there to be no free variables at this point, we might be parsing an open term +parseScoped = through (checkProgram (const True)) <=< rename <=< parseProgram