From d133b3ece99d1abf8467b2139ade353f2bbd49c7 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Thu, 6 Apr 2023 08:30:53 -0400 Subject: [PATCH] Store basis type separately in GEP/ConstGEP With opaque pointers, one cannot tell what the basis type for a `getelementptr` instruction (or constant expression) is by inspecting the parent pointer. As a result, we now store the basis type separately in `GEP`/`ConstGEP` so that it can be determined regardless of whether opaque pointers are used or not. Because this requires making a backwards-incompatible change to `ConstGEP`, I took the opportunity to include the parent pointer value as a distinguished field in `ConstGEP`. The `GEP` data constructor already does this, and it seems oddly asymmetric to not have `ConstGEP` do the same, especially since LLVM requires it to be present. See #102. --- src/Text/LLVM.hs | 2 +- src/Text/LLVM/AST.hs | 10 +++++++--- src/Text/LLVM/Labels.hs | 2 +- src/Text/LLVM/PP.hs | 20 +++++++++----------- 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/Text/LLVM.hs b/src/Text/LLVM.hs index 03d6e87..91c5c0c 100644 --- a/src/Text/LLVM.hs +++ b/src/Text/LLVM.hs @@ -695,7 +695,7 @@ select c t f = observe (typedType t) getelementptr :: IsValue a => Type -> Typed a -> [Typed Value] -> BB (Typed Value) -getelementptr ty ptr ixs = observe ty (GEP False (toValue `fmap` ptr) ixs) +getelementptr ty ptr ixs = observe ty (GEP False ty (toValue `fmap` ptr) ixs) -- | Emit a call instruction, and generate a new variable for its result. call :: IsValue a => Typed a -> [Typed Value] -> BB (Typed Value) diff --git a/src/Text/LLVM/AST.hs b/src/Text/LLVM/AST.hs index 2027ade..7facb53 100644 --- a/src/Text/LLVM/AST.hs +++ b/src/Text/LLVM/AST.hs @@ -881,10 +881,11 @@ data Instr' lab * Middle of basic block. * Returns a value of the specified type. -} - | GEP Bool (Typed (Value' lab)) [Typed (Value' lab)] + | GEP Bool Type (Typed (Value' lab)) [Typed (Value' lab)] {- ^ * "Get element pointer", compute the address of a field in a structure: inbounds check (value poisoned if this fails); + type to use as a basis for calculations; pointer to parent structure; path to a sub-component of a structure. * Middle of basic block. @@ -1118,8 +1119,11 @@ extendMetadata md stmt = case stmt of -- Constant Expressions -------------------------------------------------------- data ConstExpr' lab - = ConstGEP Bool (Maybe Word64) (Maybe Type) [Typed (Value' lab)] - -- ^ Element type introduced in LLVM 3.7 + = ConstGEP Bool (Maybe Word64) Type (Typed (Value' lab)) [Typed (Value' lab)] + -- ^ Since LLVM 3.7, constant @getelementptr@ expressions include an explicit + -- type to use as a basis for calculations. For older versions of LLVM, this + -- type can be reconstructed by inspecting the pointee type of the parent + -- pointer value. | ConstConv ConvOp (Typed (Value' lab)) Type | ConstSelect (Typed (Value' lab)) (Typed (Value' lab)) (Typed (Value' lab)) | ConstBlockAddr (Typed (Value' lab)) lab diff --git a/src/Text/LLVM/Labels.hs b/src/Text/LLVM/Labels.hs index 616f280..ad5b54a 100644 --- a/src/Text/LLVM/Labels.hs +++ b/src/Text/LLVM/Labels.hs @@ -66,7 +66,7 @@ instance HasLabel Instr' where relabel f (FCmp op l r) = FCmp op <$> traverse (relabel f) l <*> relabel f r - relabel f (GEP ib a is) = GEP ib + relabel f (GEP ib t a is) = GEP ib t <$> traverse (relabel f) a <*> traverse (traverse (relabel f)) is relabel f (Select c l r) = Select diff --git a/src/Text/LLVM/PP.hs b/src/Text/LLVM/PP.hs index 2e27c53..6b67915 100644 --- a/src/Text/LLVM/PP.hs +++ b/src/Text/LLVM/PP.hs @@ -554,7 +554,7 @@ ppInstr instr = case instr of ShuffleVector a b m -> "shufflevector" <+> ppTyped ppValue a <> comma <+> ppTyped ppValue (b <$ a) <> comma <+> ppTyped ppValue m - GEP ib ptr ixs -> ppGEP ib ptr ixs + GEP ib ty ptr ixs -> ppGEP ib ty ptr ixs Comment str -> char ';' <+> text str Jump i -> "br" <+> ppTypedLabel i @@ -711,17 +711,15 @@ ppCallSym ty val = pp_ty <+> ppValue val -> ppType res _ -> ppType ty -ppGEP :: LLVM => Bool -> Typed Value -> [Typed Value] -> Doc -ppGEP ib ptr ixs = "getelementptr" <+> inbounds - <+> (if isImplicit then empty else explicit) - <+> commas (map (ppTyped ppValue) (ptr:ixs)) +ppGEP :: LLVM => Bool -> Type -> Typed Value -> [Typed Value] -> Doc +ppGEP ib ty ptr ixs = + "getelementptr" <+> inbounds + <+> (if isImplicit then empty else explicit) + <+> commas (map (ppTyped ppValue) (ptr:ixs)) where isImplicit = checkConfig cfgGEPImplicitType - explicit = - case typedType ptr of - PtrTo ty -> ppType ty <> comma - ty -> ppType ty <> comma + explicit = ppType ty <> comma inbounds | ib = "inbounds" | otherwise = empty @@ -869,10 +867,10 @@ ppAsm s a i c = ppConstExpr' :: LLVM => (i -> Doc) -> ConstExpr' i -> Doc ppConstExpr' pp expr = case expr of - ConstGEP inb _mix mp ixs -> + ConstGEP inb _mix ty ptr ixs -> "getelementptr" <+> opt inb "inbounds" - <+> parens (mcommas ((ppType <$> mp) : (map (pure . ppTyp') ixs))) + <+> parens (commas (ppType ty : map ppTyp' (ptr:ixs))) ConstConv op tv t -> ppConvOp op <+> parens (ppTyp' tv <+> "to" <+> ppType t) ConstSelect c l r -> "select" <+> parens (commas [ ppTyp' c, ppTyp' l , ppTyp' r])