Skip to content

Commit

Permalink
feat: init support for dependency debug info
Browse files Browse the repository at this point in the history
  • Loading branch information
Andrew Kent committed Jun 17, 2021
1 parent 616f108 commit c846792
Show file tree
Hide file tree
Showing 10 changed files with 4,047 additions and 89 deletions.
169 changes: 158 additions & 11 deletions reopt-explore/Main_explore.hs
@@ -1,18 +1,26 @@
{-# LANGUAGE DataKinds #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies #-}


module Main (main) where

import Control.Exception (SomeException, catch)
import Control.Monad (foldM)
import Control.Monad (foldM, when)
import qualified Data.ByteString.Builder as BS
import qualified Data.ByteString.Char8 as BSC
import qualified Data.ByteString.Lazy as BSL
import qualified Data.ElfEdit as Elf
import Data.IORef (newIORef, readIORef)
import Data.List (intercalate)
import Data.Macaw.Discovery (DiscoveryOptions (..))
import Data.Macaw.X86 (X86_64)
import Data.Maybe (isJust)
import qualified Data.Map as Map
import Data.Parameterized.Some
import Data.Set (Set)
import qualified Data.Set as Set
import Data.Version (Version (versionBranch))
import Numeric.Natural (Natural)
import Paths_reopt (version)
Expand All @@ -29,8 +37,14 @@ import Reopt
parseElfHeaderInfo64,
recoverX86Elf,
renderLLVMBitcode,
SomeArchitectureInfo(..),
getElfArchInfo,
discoverFunDebugInfo,
debugInfoFileCache,
debugInfoDir
)
import Reopt.Events
import Reopt.TypeInference.FunTypeMaps
import Reopt.Utils.Dir
import Reopt.Utils.Exit
import System.Console.CmdArgs.Explicit
Expand All @@ -47,7 +61,11 @@ import System.Console.CmdArgs.Explicit
)
import System.Environment (getArgs)
import System.Exit (exitFailure)
import System.IO (hPutStr, hPutStrLn, stderr)
import System.FilePath (splitFileName)
import System.IO (hPutStr, hPutStrLn, stderr, IOMode(..), withFile)
import System.Directory
(createDirectoryIfMissing, getSymbolicLinkTarget, canonicalizePath, createFileLink,
withCurrentDirectory, doesFileExist, removeFile)
import Text.Printf (printf)

reoptVersion :: String
Expand All @@ -56,9 +74,18 @@ reoptVersion = "Reopt binary explorer (reopt-explore) " ++ versionString ++ "."
[h, l, r] = versionBranch version
versionString = show h ++ "." ++ show l ++ "." ++ show r

data ExploreMode =
-- | Attempt to perform a full reopt run on each binary for statistics.
ReoptExploreMode
-- | Extract debug information for functions only, storing the result
-- for later use by reopt.
| DebugExploreMode

-- | Command line arguments.
data Args = Args
{ -- | Path to input program to optimize/export
{ -- | What to do with each encountered binary.
exploreMode :: ExploreMode,
-- | Path to input program to optimize/export
programPaths :: ![FilePath],
-- | Path to `clang` command.
--
Expand All @@ -78,7 +105,8 @@ data Args = Args
defaultArgs :: Args
defaultArgs =
Args
{ programPaths = [],
{ exploreMode = ReoptExploreMode,
programPaths = [],
clangPath = "clang",
exportFnResultsPath = Nothing,
exportSummaryPath = Nothing,
Expand Down Expand Up @@ -116,6 +144,12 @@ verboseFlag = flagNone ["verbose", "v"] upd help
upd old = old {verbose = True}
help = "Show output of individual binaries."

debugInfoFlag :: Flag Args
debugInfoFlag = flagNone ["debug-info", "d"] upd help
where
upd old = old {exploreMode = DebugExploreMode}
help = "Explore and export debug information for functions only."

-- | Flag to set the path to the binary to analyze.
filenameArg :: Arg Args
filenameArg =
Expand All @@ -137,7 +171,8 @@ arguments = mode "reopt-explore" defaultArgs help filenameArg flags
clangPathFlag,
exportFnResultsFlag,
exportSummaryFlag,
verboseFlag
verboseFlag,
debugInfoFlag
]

getCommandLineArgs :: IO Args
Expand Down Expand Up @@ -277,18 +312,114 @@ renderSummaryStats results = formatSummary $ foldr processResult initSummaryStat
]
++ ppStats (totalStats s)

-- | Summary of results from parsing the debug info of an elf file.
data ExploreDebugResult =
ExploreDebugResult
{ -- | Absolute path to file debug info was gathered for.
debugFileAbsPath :: !FilePath,
-- | File debug info was cached in.
debugFileCachePath :: !FilePath,
-- | Number of functions debug info was gathered for.
debugFnCount :: !Int,
-- | Whether there was additional info gathered that was not used.
debugSkippedInfo :: !Bool
}


-- | Parse the debug section of an elf file, emit the gathered information
-- into a file in the REOPTHOME directory, and record some basic metrics
-- regarding the data collected.
exploreDebugInfo ::
[ExploreDebugResult] ->
FilePath ->
IO [ExploreDebugResult]
exploreDebugInfo results fPath = do
Some hdrInfo <- do
bs <- checkedReadFile fPath
case Elf.decodeElfHeaderInfo bs of
Left (_, msg) -> do
hPutStrLn stderr $ "Error reading " ++ fPath ++ ":"
hPutStrLn stderr $ " " ++ msg
exitFailure
Right (Elf.SomeElf hdr) ->
pure $! Some hdr
let hdr = Elf.header hdrInfo
-- Get architecture specific information
marchInfo <- getElfArchInfo (Elf.headerClass hdr) (Elf.headerMachine hdr) (Elf.headerOSABI hdr)
(warnings, SomeArch ainfo _pltFn) <- handleEitherStringWithExit marchInfo
mapM_ (hPutStrLn stderr) warnings
mFnMap <- runReoptM printLogEvent $
discoverFunDebugInfo hdrInfo ainfo
fnMap <- handleEitherWithExit mFnMap
cPath <- debugInfoFileCache $ snd (splitFileName fPath)
withFile cPath WriteMode $ \h -> hPutStrLn h (show $ nameTypeMap fnMap)

absPath <- canonicalizePath fPath
let addrTypeMapSz = Map.size $ addrTypeMap fnMap
let noreturnMapSz = Map.size $ noreturnMap fnMap
let result = ExploreDebugResult
{ debugFileAbsPath = absPath,
debugFileCachePath = cPath,
debugFnCount = Map.size $ nameTypeMap fnMap,
debugSkippedInfo = addrTypeMapSz > 0 || noreturnMapSz > 0
}
when (not $ 0 == addrTypeMapSz) $ do
hPutStrLn stderr $ "WARNING: " ++ show addrTypeMapSz ++ " functions in debug info ignored (addrTypeMap) in " ++ fPath ++ "."
when (not $ 0 == noreturnMapSz) $ do
hPutStrLn stderr $ "WARNING: " ++ show noreturnMapSz ++ " functions in debug info ignored (noreturnMap) in " ++ fPath ++ "."
pure $ result : results

-- | Examine a symbolic link to see if it refers to a previously cached debug
-- library's debug info. If the link does correspond to such a file, create a
-- symbolic link in the debug cache to the other cached file. This is necessary
-- because many binaries list libraries they depend on which are actually
-- symbolic links to a library with a slightly different name, so by mimicking
-- these links in our debug cache we can find the cached debug info.
exploreLink ::
Set FilePath ->
() ->
FilePath ->
IO ()
exploreLink targets () linkPath = do
tgtPath <- getSymbolicLinkTarget linkPath
let (linkDir, linkName) = splitFileName linkPath
absTgtPath <- withCurrentDirectory linkDir $ canonicalizePath tgtPath
if not $ Set.member absTgtPath targets then pure ()
else do
newLinkDestPath <- debugInfoFileCache $ snd $ splitFileName absTgtPath
newLinkPath <- debugInfoFileCache $ linkName
alreadyExists <- doesFileExist newLinkPath
when alreadyExists $ removeFile newLinkPath
createFileLink newLinkDestPath newLinkPath

renderDebugResult :: ExploreDebugResult -> String
renderDebugResult res =
(debugFileAbsPath res)++":\n "++(show $ debugFnCount res)++" functions' type info discovered in debug section."

renderDebugSummary :: FilePath -> [ExploreDebugResult] -> String
renderDebugSummary debugDir results =
"\n\nDebug Exploration Totals"
++"\n "++(show totalCnt)++" functions discovered in the debug sections of "++(show (length results))++" elf files."
++ maybeWarnMsg
++ "\n Information cached at " ++ debugDir ++ "."
where totalCnt = foldl (+) 0 $ map debugFnCount results
warnCnt = foldl (+) (0 :: Int) $ map (\res -> if debugSkippedInfo res then 1 else 0) results
maybeWarnMsg = if warnCnt == 0
then ""
else "\n "++(show warnCnt)++" elf files had debug type information which was not incorporated."

main :: IO ()
main = do
args <- getCommandLineArgs
case (showHelp args, programPaths args) of
(True, _) -> do
case (showHelp args, programPaths args, exploreMode args) of
(True, _, _) -> do
print $ helpText [] HelpFormatAll arguments
(False, []) -> do
(False, [], _) -> do
hPutStrLn stderr "Must provide at least one input program or directory to explore."
hPutStrLn stderr "Use --help to see additional options."
exitFailure
(False, paths) -> do
results <- foldM (withElfFilesInDir (exploreBinary args)) [] paths
(False, paths, ReoptExploreMode) -> do
results <- foldM (withElfExeFilesInDir (exploreBinary args)) [] paths
mapM_ (\s -> hPutStr stderr ("\n" ++ renderExplorationResult s)) results
hPutStrLn stderr $ renderSummaryStats results
case exportFnResultsPath args of
Expand All @@ -305,6 +436,22 @@ main = do
overallSummary = renderSummaryStats results
writeFile exportPath $ individualSummaries ++ "\n" ++ overallSummary
hPutStrLn stderr $ "Summary statistics written to " ++ exportPath ++ "."
(False, paths, DebugExploreMode) -> do
when (isJust $ exportFnResultsPath args) $ do
hPutStrLn stderr "The --export-fn-results flag not compatible with the --debug-info flag."
exitFailure
when (isJust $ exportFnResultsPath args) $ do
hPutStrLn stderr "The --export-summary flag not compatible with the --debug-info flag."
exitFailure
infoDir <- debugInfoDir
createDirectoryIfMissing True infoDir
results <- foldM (withElfFilesInDir exploreDebugInfo) [] paths
let tgts = Set.fromList $ map debugFileAbsPath results
foldM (withSymLinksInDir (exploreLink tgts)) () paths
mapM_ (\s -> hPutStr stderr ("\n" ++ renderDebugResult s)) results
hPutStrLn stderr $ renderDebugSummary infoDir results


where
toRows :: ExplorationResult -> [[String]]
toRows (ExplorationStats summary _stats _) = summaryRows summary
toRows (ExplorationStats summary _stats _) = summaryRows summary
6 changes: 5 additions & 1 deletion reopt.cabal
Expand Up @@ -54,6 +54,7 @@ library

exposed-modules:
Reopt
Reopt.CentOS7Debug
Reopt.CFG.FnRep.X86
Reopt.CFG.LLVM
Reopt.EncodeInvariants
Expand All @@ -64,6 +65,7 @@ library
Reopt.PltParser
Reopt.Relinker.Binary
Reopt.Server
Reopt.TypeInference.FunTypeMaps
Reopt.Utils.Dir
Reopt.Utils.Exit
other-modules:
Expand All @@ -85,7 +87,6 @@ library
Reopt.Relinker.Relations
Reopt.Relinker.Relocations
Reopt.TypeInference.DebugTypes
Reopt.TypeInference.FunTypeMaps
Reopt.TypeInference.Header
Reopt.TypeInference.HeaderTypes
Reopt.Utils.Flags
Expand Down Expand Up @@ -141,11 +142,14 @@ executable reopt-explore
bytestring,
cmdargs,
containers,
directory,
elf-edit,
filepath,
lens,
macaw-base,
macaw-x86,
mtl,
parameterized-utils >= 0.1.6,
reopt,
unix

Expand Down
54 changes: 39 additions & 15 deletions scripts/reopt_explore_tests.sh
Expand Up @@ -2,18 +2,42 @@

set -e

tar_file="deps/reopt-benchmark-binaries/centos7-dev-bin.tar"
lzma_file="$tar_file.lzma"

if [[ "$lzma_file" -nt "$tar_file" ]] ; then
unlzma -k "$lzma_file"
fi

mkdir -p deps/reopt-benchmark-binaries/centos7-dev
pushd deps/reopt-benchmark-binaries/centos7-dev
echo "Unpacking"
tar -xf "../centos7-dev-bin.tar"
# This seems to be a tar-inside-tar
tar -xf "centos7-dev-bin.tar"
cabal run exe:reopt-explore -- ./bin --export-summary=centos7-bin-summary.txt
popd
CENTOS7_DEV_DIR="centos7-dev"


unlzma_mv_untar () {
echo -n "Unpacking and extracting contents of $1.lzma to $2..."
if [[ "$1.lzma" -nt "$1" ]] ; then
unlzma -k "$1.lzma"
fi
mkdir -p $2
if [[ -e "$1" ]] ; then
mv $1 $2
fi
pushd $2 > /dev/null
tar -xkf $1
popd > /dev/null # $2
echo " done!"
}

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
BENCHMARK_DIR="$SCRIPT_DIR/../deps/reopt-benchmark-binaries"
pushd $BENCHMARK_DIR > /dev/null


CENTOS7_DEV_BIN="centos7-dev-bin.tar"
CENTOS7_DEV_LIB64="centos7-dev-lib64.tar"
CENTOS7_DEV_DEBUG_LIB64="centos7-dev-debug-lib64.tar"

echo "Decompressing and unpacking centos7-dev files..."
unlzma_mv_untar $CENTOS7_DEV_BIN $CENTOS7_DEV_DIR
unlzma_mv_untar $CENTOS7_DEV_LIB64 $CENTOS7_DEV_DIR
unlzma_mv_untar $CENTOS7_DEV_DEBUG_LIB64 $CENTOS7_DEV_DIR


# echo "Exploring centos7-dev binaries..."
# pushd $CENTOS7_DEV_DIR
# cabal run exe:reopt-explore -- ./bin --export-summary=centos7-bin-summary.txt
# popd # $CENTOS7_DEV_DIR

popd > /dev/null # $BENCHMARK_DIR

0 comments on commit c846792

Please sign in to comment.