Skip to content

Commit

Permalink
Adapt to GEP/ConstGEP gaining explicit base types
Browse files Browse the repository at this point in the history
This bumps the `llvm-pretty` submodule to bring in the changes to the
`GEP`/`ConstGEP` data constructors from GaloisInc/llvm-pretty#110 and adapts the
code in `llvm-pretty-bc-parser` accordingly.

Because `ConstGEP` now stores the basis type for calculations explicitly, I
needed to fix #218 in order to ensure that the basis type is always parsed
properly. In the process of fixing this issue, I refactored the `parseCeGep` to
make the code clearer and more closely mirror the structure of LLVM's own
bitcode parser.

This is necessary in order to use `getelementptr` on an opaque pointer.
See #177. A test case will be added in a subsequent commit.
  • Loading branch information
RyanGlScott committed May 30, 2023
1 parent 6349a78 commit 02fdbd0
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 30 deletions.
2 changes: 1 addition & 1 deletion llvm-pretty
74 changes: 55 additions & 19 deletions src/Data/LLVM/BitCode/IR/Constants.hs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import Data.Bits (shiftL,shiftR,testBit, Bits)
import Data.LLVM.BitCode.BitString ( pattern Bits' )
import qualified Data.LLVM.BitCode.BitString as BitS
import qualified Data.Map as Map
import Data.Maybe (fromMaybe, isJust)
import Data.Maybe (fromMaybe)
import Data.Word (Word16, Word32,Word64)

#if __GLASGOW_HASKELL__ >= 704
Expand Down Expand Up @@ -317,7 +317,7 @@ parseConstantEntry t (getTy,cs) (fromEntry -> Just r) =
-- [n x operands]
12 -> label "CST_CODE_CE_GEP" $ do
ty <- getTy
v <- parseCeGep False Nothing t r
v <- parseCeGep CeGepCode12 t r
return (getTy,Typed ty v:cs)

-- [opval,opval,opval]
Expand Down Expand Up @@ -369,7 +369,7 @@ parseConstantEntry t (getTy,cs) (fromEntry -> Just r) =
-- [n x operands]
20 -> label "CST_CODE_CE_INBOUNDS_GEP" $ do
ty <- getTy
v <- parseCeGep True Nothing t r
v <- parseCeGep CeGepCode20 t r
return (getTy,Typed ty v:cs)

-- [funty,fnval,bb#]
Expand Down Expand Up @@ -411,10 +411,7 @@ parseConstantEntry t (getTy,cs) (fromEntry -> Just r) =
-- [opty, flags, n x operands]
24 -> label "CST_CODE_CE_GEP_WITH_INRANGE_INDEX" $ do
ty <- getTy
(flags :: Word64) <- parseField r 1 numeric
let inBounds = testBit flags 0
inRangeIndex = flags `shiftR` 1
v <- parseCeGep inBounds (Just inRangeIndex) t r
v <- parseCeGep CeGepCode24 t r
return (getTy,Typed ty v:cs)

-- [opcode, opval]
Expand Down Expand Up @@ -454,23 +451,62 @@ parseConstantEntry _ st (abbrevDef -> Just _) =
parseConstantEntry _ _ e =
fail ("constant block: unexpected: " ++ show e)

parseCeGep :: Bool -> Maybe Word64 -> ValueTable -> Record -> Parse PValue
parseCeGep isInbounds mInrangeIdx t r = do
let isExplicit = odd (length (recordFields r)) -- TODO: is this right for INRANGE_INDEX?
firstIdx = if isJust mInrangeIdx then 2 else if isExplicit then 1 else 0
field = parseField r
loop n = do
-- | The different codes for constant @getelementptr@ expressions. Each one has
-- minor differences in how they are parsed.
data CeGepCode
= CeGepCode12
-- ^ @CST_CODE_CE_GEP = 12@. The original.
| CeGepCode20
-- ^ @CST_CODE_CE_INBOUNDS_GEP = 20@. This adds an @inbounds@ field that
-- indicates that the result value should be poison if it performs an
-- out-of-bounds index.
| CeGepCode24
-- ^ @CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24@. This adds an @inrange@ field
-- that indicates that loading or storing to the result pointer will have
-- undefined behavior if the load or store would access memory outside of the
-- bounds of the indices marked as @inrange@.
deriving Eq

-- | Parse a 'ConstGEP' value. There are several variations on this theme that
-- are captured in the 'CeGepCode' argument.
parseCeGep :: CeGepCode -> ValueTable -> Record -> Parse PValue
parseCeGep code t r = do
let field = parseField r

(mbBaseTy, ix0) <-
if code == CeGepCode24 || odd (length (recordFields r))
then do baseTy <- getType =<< field 0 numeric
pure (Just baseTy, 1)
else pure (Nothing, 0)

(isInbounds, mInrangeIdx, ix1) <-
case code of
CeGepCode12 -> pure (False, Nothing, ix0)
CeGepCode20 -> pure (True, Nothing, ix0)
CeGepCode24 -> do
(flags :: Word64) <- parseField r ix0 numeric
let inbounds = testBit flags 0
inrangeIdx = flags `shiftR` 1
pure (inbounds, Just inrangeIdx, ix0 + 1)

let loop n = do
ty <- getType =<< field n numeric
elt <- field (n+1) numeric
rest <- loop (n+2) `mplus` return []
cxt <- getContext
return (Typed ty (typedValue (forwardRef cxt elt t)) : rest)
mPointeeType <-
if isExplicit
then Just <$> (getType =<< field 0 numeric)
else pure Nothing
args <- loop firstIdx
return $! ValConstExpr (ConstGEP isInbounds mInrangeIdx mPointeeType args)
args <- loop ix1
(ptr, args') <-
case args of
[] -> fail "Invalid constant GEP with no operands"
(base:args') -> pure (base, args')

baseTy <-
case mbBaseTy of
Just baseTy -> pure baseTy
Nothing -> Assert.elimPtrTo "constant GEP not headed by pointer" (typedType ptr)

return $! ValConstExpr (ConstGEP isInbounds mInrangeIdx baseTy ptr args')

parseWideInteger :: Record -> Int -> Parse Integer
parseWideInteger r idx = do
Expand Down
21 changes: 11 additions & 10 deletions src/Data/LLVM/BitCode/IR/Function.hs
Original file line number Diff line number Diff line change
Expand Up @@ -1117,14 +1117,15 @@ baseType ty = ty
-- [n x operands]
parseGEP :: ValueTable -> Maybe Bool -> Record -> PartialDefine -> Parse PartialDefine
parseGEP t mbInBound r d = do
(ib, tv, r', ix) <-
(ib, ty, tv, r', ix) <-
case mbInBound of

-- FUNC_CODE_INST_GEP_OLD
-- FUNC_CODE_INST_INBOUNDS_GEP_OLD
Just ib -> do
(tv,ix') <- getValueTypePair t r 0
return (ib, tv, r, ix')
ty <- Assert.elimPtrTo "GEP not headed by pointer" (typedType tv)
return (ib, ty, tv, r, ix')

-- FUNC_CODE_INST_GEP
Nothing -> do
Expand All @@ -1142,11 +1143,11 @@ parseGEP t mbInBound r d = do
, "Base type of operand: " ++ show (ppType (baseType (typedType tv)))
])
-}
return (ib, tv { typedType = PtrTo ty }, r', ix')
return (ib, ty, tv, r', ix')

args <- label "parseGepArgs" (parseGepArgs t r' ix)
rty <- label "interpGep" (interpGep (typedType tv) args)
result rty (GEP ib tv args) d
rty <- label "interpGep" (interpGep ty tv args)
result rty (GEP ib ty tv args) d

-- Parse an @atomicrmw@ instruction, which can be represented by one of the
-- following function codes:
Expand Down Expand Up @@ -1293,16 +1294,16 @@ parseGepArgs t r = loop
rest <- loop ix'
return (tv:rest)

-- | Interpret the getelementptr arguments, to determine the final type of the
-- instruction.
interpGep :: Type -> [Typed PValue] -> Parse Type
interpGep ty vs = check (resolveGep ty vs)
-- | Interpret a getelementptr instruction to determine its result type.
interpGep :: Type -> Typed PValue -> [Typed PValue] -> Parse Type
interpGep baseTy ptr vs = check (resolveGep baseTy ptr vs)
where
check res = case res of
HasType rty -> return (PtrTo rty)
Invalid -> fail $ unlines $
[ "Unable to determine the type of getelementptr"
, "Input type: " ++ show ty
, "Base type: " ++ show baseTy
, "Pointer value: " ++ show ptr
]
Resolve i k -> do
ty' <- getType' =<< getTypeId i
Expand Down

0 comments on commit 02fdbd0

Please sign in to comment.