Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Show statistics relative to all included TF matrices and probes, not …

…just those which have hits
  • Loading branch information...
commit beebcc896b73bff05b42578433c0f177a870046e 1 parent 0e2f2c2
Andrew authored
Showing with 58 additions and 26 deletions.
  1. +58 −26 motocse.hs
View
84 motocse.hs
@@ -6,10 +6,10 @@ import qualified Data.Map as M
import qualified Data.Set as S
import Data.List
import Control.Monad
-import Debug.Trace
-
-fsaDirectory = "/home/andrew/Documents/fsafiles/"
-inputPredictionFile = "/home/andrew/Documents/gmatim/hits.fasta"
+import System.Environment
+import System.Console.GetOpt
+import qualified Data.ByteString.Char8 as BS
+import qualified Data.Traversable as T
unsafeRight (Left _) = error "Expected Left operand in unsafeEitherFirst"
unsafeRight (Right x) = x
@@ -82,7 +82,7 @@ flipPair (x, y) = (y, x)
pairListToAssociationSet = foldl' addPairListByFirstToSet S.empty
addPairListByFirstToSet s (f, l) = foldl' (\s' -> \v -> S.insert (f, v) s') s l
-setCrossProduct x y = appendListCrossProduct [] (S.toList x) (S.toList y)
+listCrossProduct x y = appendListCrossProduct [] x y
mapAndPrepend f v iv = foldl' (\x -> \y -> (f y):x ) iv v
appendListCrossProduct l [] y = l
appendListCrossProduct l (xs:x) y = (appendListCrossProduct (mapAndPrepend ((,)xs) y l) x y)
@@ -99,34 +99,66 @@ updateClassificationCounts standardSet testSet ((!tp, !fp), (!tn, !fn)) v =
else
((tp, fp), ((tn + 1), fn))
-testPredictions :: [(String, [String])] -> [(String, [String])] -> ((Int, Int), (Int, Int))
-testPredictions goldStandard compareTo =
- trace "Entering testPredictions" $ trace "Finished testPredictions" $!
+testPredictions :: [String] -> [String] -> [(String, [String])] -> [(String, [String])] -> ((Int, Int), (Int, Int))
+testPredictions allTFs allProbes goldStandard compareTo =
let
-- In both sets of pairs, make the gene the first member, and the probe the second.
- goldStandardPairs = trace "Making goldStandardPairs" $ trace "Done making goldStandardPairs" $! pairListToAssociationSet goldStandard
+ goldStandardPairs = pairListToAssociationSet goldStandard
compareToPairs = S.map flipPair $ pairListToAssociationSet compareTo
- -- Get a set of all genes in the gold standard data.
- allGenes = S.map fst goldStandardPairs
- -- Get a set of all probes in the gold standard data.
- allProbes = S.map snd goldStandardPairs
- allPairs = setCrossProduct allGenes allProbes
+ allPairs = listCrossProduct allTFs allProbes
in
foldl' (updateClassificationCounts goldStandardPairs compareToPairs) ((0, 0), (0, 0)) allPairs
+data CmdOptions = CmdOptions {
+ cmdOptFSADirectory :: Maybe String,
+ cmdOptInputPredictionFile :: Maybe String,
+ cmdOptIncludeTFFile :: Maybe String,
+ cmdOptShowHelp :: Bool
+};
+
+commandLineOptions =
+ [
+ Option [] ["help"] (NoArg (\opts -> opts {cmdOptShowHelp = True } ) ) "Shows this help message",
+ Option [] ["prediction_file"] (ReqArg (\f -> \opts -> opts { cmdOptInputPredictionFile = Just f } ) "DIR") "The input file containing the predictions to be evaluate against the FSA gold standard",
+ Option [] ["fsa_directory"] (ReqArg (\f -> \opts -> opts {cmdOptFSADirectory = Just f } ) "DIR") "The directory from which the 'gold standard' probe binding data should be read",
+ Option [] ["include_tfs_file"] (ReqArg (\f opts -> opts { cmdOptIncludeTFFile = Just f }) "FILENAME") "A file containing a list of probes to use (optional)"
+ ]
+
+blankCommandLine = CmdOptions Nothing Nothing Nothing False
+
main = do
- files <- getDirectoryContents fsaDirectory
- boundByGene <-
- let
- filesByGene = groupByFirst $ map (unsafeRight . parse parseOutGeneName "Filename") (filter ((/='.') . (flip (!!)) 0) files)
- in
- forM filesByGene $ \(gene, filenames) ->
+ cmdline <- getArgs
+ case (getOpt RequireOrder commandLineOptions cmdline) of
+ (_, _, errs@(_:_)) ->
+ putStrLn ((showString "Errors: " . shows errs) "")
+ (opts, _, _) ->
+ case (foldl' (\state -> \t -> t state) blankCommandLine opts) of
+ CmdOptions { cmdOptShowHelp = True } ->
+ putStrLn $ usageInfo "motocse" commandLineOptions
+ CmdOptions { cmdOptInputPredictionFile = Nothing } ->
+ putStrLn "--prediction_file option is mandatory."
+ CmdOptions { cmdOptFSADirectory = Nothing } ->
+ putStrLn "--fsa_directory option is mandatory."
+ CmdOptions { cmdOptFSADirectory = Just fsaDirectory, cmdOptInputPredictionFile = Just inputPredictionFile,
+ cmdOptIncludeTFFile = includeTFFFile } ->
+ mainWithOptions fsaDirectory inputPredictionFile includeTFFFile
+
+mainWithOptions fsaDirectory inputPredictionFile includeTFFile =
+ do
+ includeTFs <-
+ T.traverse (liftM (map BS.unpack . BS.lines) . BS.readFile) includeTFFile
+ files <- getDirectoryContents fsaDirectory
+ let filesByGene = groupByFirst $ map (unsafeRight . parse parseOutGeneName "Filename") (filter ((/='.') . (flip (!!)) 0) files)
+ let filesByGeneFiltered = maybe id (filter . flip (elem . fst)) includeTFs filesByGene
+ boundByGene <-
+ forM filesByGeneFiltered $ \(gene, filenames) ->
do
probes <- liftM concat $ mapM (loadProbesFromFile . (fsaDirectory++)) filenames
return (gene, probes)
- predictBoundByGene <-
- loadGenesForProbesFromFile inputPredictionFile
- let
- ((truePositives, falsePositives), (trueNegatives, falseNegatives)) = testPredictions boundByGene predictBoundByGene
- in
- putStrLn $ (showString "TruePositives = " . shows truePositives . showString "; TrueNegatives = " . shows trueNegatives . showString "; FalsePositives = " . shows falsePositives . showString "; FalseNegatives = " . shows falseNegatives) ""
+ predictBoundByGene <- loadGenesForProbesFromFile inputPredictionFile
+ let
+ ((truePositives, falsePositives), (trueNegatives, falseNegatives)) = testPredictions includeTFs (map fst filesByGene) boundByGene predictBoundByGene
+ in
+ do
+ putStrLn $ (showString "TruePositives = " . shows truePositives . showString "; TrueNegatives = " . shows trueNegatives . showString "; FalsePositives = " . shows falsePositives . showString "; FalseNegatives = " . shows falseNegatives) ""
+ putStrLn $ (showString "TPR = " . shows (100.0 * (fromIntegral truePositives) / (fromIntegral (truePositives + falseNegatives))) . showString "; FPR = " . shows (100.0 * (fromIntegral falsePositives) / (fromIntegral (falsePositives + trueNegatives)))) ""
Please sign in to comment.
Something went wrong with that request. Please try again.