Skip to content

Commit

Permalink
Refactor SignalDetection;Quantification,Chargestate
Browse files Browse the repository at this point in the history
Chargestate: add poission based isotopic cluster estimation
SignalDetection: add Second Deviation based Centroidisation, add Module Padding, add Module Care with Helpferfunctions
  • Loading branch information
david authored and david committed Feb 13, 2017
1 parent a2bc279 commit 80a67b2
Show file tree
Hide file tree
Showing 7 changed files with 555 additions and 490 deletions.
Binary file modified BioFSharp.VC.db
Binary file not shown.
2 changes: 1 addition & 1 deletion BioFSharp.sln
Expand Up @@ -32,7 +32,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "content", "content", "{8E6D
docs\content\BioSequence.fsx = docs\content\BioSequence.fsx
docs\content\BlastWrapper.fsx = docs\content\BlastWrapper.fsx
docs\content\CentroidSpectra.fsx = docs\content\CentroidSpectra.fsx
..\..\..\Documents\BioInfo\isotopic distribution _17\IsotopicDistribution.fsx = ..\..\..\Documents\BioInfo\isotopic distribution _17\IsotopicDistribution.fsx
docs\content\ChargeStateDetermination.fsx = docs\content\ChargeStateDetermination.fsx
docs\content\Formula.fsx = docs\content\Formula.fsx
docs\content\index.fsx = docs\content\index.fsx
docs\content\PeptideLookUp.fsx = docs\content\PeptideLookUp.fsx
Expand Down
19 changes: 13 additions & 6 deletions docs/content/ChargeStateDetermination.fsx
Expand Up @@ -8,7 +8,7 @@
#r "../../bin/BioFSharp.IO.dll"
#r "../../bin/MathNet.Numerics.dll"
#r "../../bin/MathNet.Numerics.FSharp.dll"

#r "../../packages/build/FSharp.Plotly/lib/net40/Fsharp.Plotly.dll"
(**
Charge state determination
==========================
Expand All @@ -27,7 +27,7 @@ open BioFSharp.Mz
open BioFSharp.IO
open MathNet.Numerics
open FSharp.Care

open FSharp.Plotly
///Returns the first entry of a examplary mgf File
let ms1DataTest =
Mgf.readMgf (__SOURCE_DIRECTORY__ + "/data/ms1Example.mgf")
Expand Down Expand Up @@ -86,7 +86,7 @@ window of a user given width centered around a user given m/z value.
/// Returns a tuple of float arrays (mzData[]*intensityData[]) containing only the centroids in a
/// window of a user given width centered around a user given m/z value.
let centroidsInWindow =
SignalDetection.windowToCentroidBy (SignalDetection.Wavelet.toSNRFilteredCentroid 0.1 30.) ms1DataTest.Mass ms1DataTest.Intensity 7.5 ms2PrecursorMZ
SignalDetection.windowToCentroidBy (SignalDetection.Wavelet.toCentroid 0.1 50. 30.) ms1DataTest.Mass ms1DataTest.Intensity 7.5 ms2PrecursorMZ

(**
Returns a list of assigned chargestates sorted by their score.
Expand All @@ -104,10 +104,17 @@ significance level
/// significance level
let testedItems =
putativeCharges
|> List.map (fun assCh -> ChargeState.createTestedItem assCh (ChargeState.empiricalPValueOf initGen (assCh.SubSetLength ,float assCh.Charge) assCh.MZChargeDev ))
|> List.map (fun assCh -> ChargeState.createTestedItem assCh (ChargeState.empiricalPValueOfSim initGen (assCh.SubSetLength ,float assCh.Charge) assCh.MZChargeDev ))
|> ChargeState.removeSubSetsOfBestHit
|> List.filter (fun item -> item.PValue < 0.05)
|> List.map (fun testedI -> testedI.TestedObject)


|> List.map (fun testedI ->
let normPeaks =
ChargeState.normalizePeaksByIntensitySum testedI.Peaks
let poissEst =
let tmp =
ChargeState.poissonEstofMassTrunc ChargeState.n15MassToLambda normPeaks.Length testedI.PutMass
tmp
ChargeState.kullbackLeiblerDivergenceOf normPeaks poissEst
)

87 changes: 64 additions & 23 deletions src/BioFSharp.Mz/ChargeState.fs
Expand Up @@ -46,12 +46,12 @@ module ChargeState =
DistanceRealTheoPeakSpacing : float list
SubSetLength : int
StartPeakIntensity : float
PeakSourcePositions : Set<Peak>
Peaks : Set<Peak>
}

let createAssignedCharge precMZ charge putMass mZChargeDev score distanceRealTheoPeakSpacing subSetLength startPeakIntensity peakSourcePositions= {
let createAssignedCharge precMZ charge putMass mZChargeDev score distanceRealTheoPeakSpacing subSetLength startPeakIntensity peaks= {
PrecMZ=precMZ; Charge=charge; PutMass=putMass; MZChargeDev=mZChargeDev; Score=score;DistanceRealTheoPeakSpacing=distanceRealTheoPeakSpacing;
SubSetLength=subSetLength; StartPeakIntensity=startPeakIntensity; PeakSourcePositions=peakSourcePositions }
SubSetLength=subSetLength; StartPeakIntensity=startPeakIntensity; Peaks=peaks }

type TestedItem<'a> = {
TestedObject: 'a
Expand All @@ -64,7 +64,7 @@ module ChargeState =
/// Returns Index of the highestPeak flanking a given mzValue
let idxOfHighestPeakBy (mzData: float []) (intensityData: float []) mzValue =
let idxHigh =
mzData |> Array.tryFindIndex (fun x -> x > mzValue) // faster as binary search
mzData |> Array.tryFindIndex (fun x -> x > mzValue) // faster as binary search
let idxLow =
match idxHigh with
| None -> Some (mzData.Length-1)
Expand All @@ -78,8 +78,15 @@ module ChargeState =
else
if intensityData.[idxLow.Value] > intensityData.[idxHigh.Value] then
idxLow.Value
else idxHigh.Value

else idxHigh.Value

/// Returns Index of the highestPeak flanking a given mzValue
let idxOfClosestPeakBy (mzData: float []) (intensityData: float []) mzValue =
mzData
|> Array.mapi (fun i x -> abs (x - mzValue), i) // faster as binary search
|> Array.minBy (fun (value,idx) -> value)
|> fun (value,idx) -> idx

/// Returns a Collection of MZIntensityPeaks, The Collection starts with the first Element on the right side of the startIdx.
/// and ends either with the last element of the mzIntensityArray or when the MzDistance to the highest Peak exceeds
/// the given windowwidth.
Expand All @@ -102,7 +109,7 @@ module ChargeState =
loop (count+1) ((createPeak (mzData.[count]-startPkMZ) (intensityData.[count] / startPkInt) )::accmz) mzData intensityData
else loop (count+1) accmz mzData intensityData
loop (startIdx+1) [] mzData intensityData
|> fun sourceSet -> startPkInt , createPutativeIsotopeCluster sourceSet (sourceSet.Length+1) (sourceSet.Length+1) //the length must be raised by 1 because the first element (0.,1.) is left out from the collection
|> fun sourceSet -> startPkInt , createPutativeIsotopeCluster sourceSet (sourceSet.Length+1) (sourceSet.Length+1) //the length must be raised by 1 because the first element (0.,1.) is left out from the collection
else 0., createPutativeIsotopeCluster [] 0 0

/// Creates the PowerSet of a given Collection of MZIntensityPeaks. Adds a StartPeak with the relative Position 0 and the
Expand All @@ -119,7 +126,7 @@ module ChargeState =
|> List.map (fun subSet -> createPutativeIsotopeCluster subSet putCluster.SourceSetLength subSet.Length)

/// Calculates the mzDistances of a List of MzIntensityPeaks
let mzDistancesOf (mzIntensityPeaks: PeakList<_>) = //TODO: calc Slope; times Slope changed: Timo fragen ob gewollt.
let mzDistancesOf (mzIntensityPeaks: PeakList<_>) = //TODO: calc Slope; times Slope changed
let rec innerF acc (mzIntensityPeaks: PeakList<_>) =
match mzIntensityPeaks with
| [] -> acc
Expand Down Expand Up @@ -202,23 +209,31 @@ module ChargeState =
/// Returns the empirically determined PValue. The PValue is the quotient of simulated mzChargeDeviations lower than the mzChargeDeviation
/// observed divided by their total number
let empiricalPValueOf initGenerateMzSpecDevWithMemF (nrOfPeaksInSubSet,charge) score = //TODO nrOfPeaks,charge score in parameter
let empiricalPValueOfSim initGenerateMzSpecDevWithMemF (nrOfPeaksInSubSet,charge) score = //TODO nrOfPeaks,charge score in parameter
let generateMzSpecDev = initGenerateMzSpecDevWithMemF (nrOfPeaksInSubSet,charge)
let numerator = (generateMzSpecDev |> Array.tryFindIndex (fun x -> x > score))
match numerator with
| Some x -> (float x) / float generateMzSpecDev.Length
| None -> 1.

/// Returns the empirically determined PValue. The PValue is the quotient of simulated mzChargeDeviations lower than the mzChargeDeviation
/// observed divided by their total number
let empiricalPValueOf distribution (nrOfPeaksInSubSet,charge) score = //TODO nrOfPeaks,charge score in parameter
let numerator = (distribution |> Array.tryFindIndex (fun x -> x > score))
match numerator with
| Some x -> (float x) / float distribution.Length
| None -> 1.

/// Returns list of putative precursorChargeStates along with Properties used for evaluation.
let putativePrecursorChargeStatesBy (chargeDeterminationParams: ChargeDetermParams) (mzData: float []) (intensityData: float []) (precursorMZ:float) =
let (startPeakIntensity,originSet) = getRelPeakPosInWindowBy (mzData: float []) (intensityData: float []) chargeDeterminationParams.Width chargeDeterminationParams.MinIntensity chargeDeterminationParams.DeltaMinIntensity (idxOfHighestPeakBy (mzData: float []) (intensityData: float []) precursorMZ)
let (startPeakIntensity,originSet) = getRelPeakPosInWindowBy (mzData: float []) (intensityData: float []) chargeDeterminationParams.Width chargeDeterminationParams.MinIntensity chargeDeterminationParams.DeltaMinIntensity (idxOfClosestPeakBy (mzData: float []) (intensityData: float []) precursorMZ)
originSet
|> powerSetOf
|> List.filter (fun subSet -> subSet.SubSetLength > 1)
|> List.map (fun subSet ->
let peakPos = subSet.Peaks
|> Set.ofList
let peakPos =
subSet.Peaks
|> Set.ofList
let interPeakDistances = mzDistancesOf subSet.Peaks
let meanInterPeakDistances = MathNet.Numerics.Statistics.Statistics.Mean interPeakDistances
let assignedCharge = getChargeBy chargeDeterminationParams meanInterPeakDistances
Expand All @@ -233,7 +248,7 @@ module ChargeState =
)
|> List.sortBy (fun assignedCharge -> assignedCharge.Score)
|> List.distinctBy (fun assignedCharge -> assignedCharge.Charge)

/// Returns the StandardDeviation of the PeakDistances
let peakPosStdDevBy (putativeChargeStates: AssignedCharge list) =
putativeChargeStates
Expand All @@ -246,7 +261,7 @@ module ChargeState =
let rec loop bestSet acc (assignedCharges: TestedItem<AssignedCharge> list) =
match assignedCharges with
| [] -> (bestSet::acc) |> List.sortBy (fun x -> x.TestedObject.Score)
| h::tail -> match Set.isSuperset bestSet.TestedObject.PeakSourcePositions h.TestedObject.PeakSourcePositions with
| h::tail -> match Set.isSuperset bestSet.TestedObject.Peaks h.TestedObject.Peaks with
| false -> loop bestSet (h::acc) tail
| true -> loop bestSet acc tail
if assignedCharges = [] then
Expand All @@ -256,18 +271,44 @@ module ChargeState =
loop bestSet [] assignedCharges

///
let n14MassToLambda mass = mass * 0.0005671631116 - 0.02288968462
let normalizePeaksByIntensitySum (peaks: Set<BioFSharp.Mz.Peak>) =
let tmp = peaks |> Set.toArray
let sumOfIntensities =
tmp |> Array.sumBy (fun x -> x.Intensity)
tmp
|> Array.map (fun x -> x.Intensity / sumOfIntensities)

///
let n15MassToLambda mass = mass * 0.000509870824 - 0.01740245183
let n14MassToLambda mass = mass * 0.0005903277935 - 0.03542317929

///
let n15MassToLambda mass = mass * 0.0005306674223 - 0.03184171518

///
let poissonProb lambda xValue =
(( lambda ** xValue) / MathNet.Numerics.SpecialFunctions.Factorial (int xValue) ) * exp(-lambda)
( ( lambda ** xValue) / MathNet.Numerics.SpecialFunctions.Factorial (int xValue) ) * exp(-lambda)

///
let poissonEstofMass (massToLambda: float -> float) (limit:int) mass =
[|0. .. (float limit)|]
|> Array.map (poissonProb (massToLambda mass))


let poissonEstofMassTrunc (massToLambda: float -> float) (limit:int) mass =
let tmp =
[|for i=0. to float limit-1. do
yield poissonProb (massToLambda mass) i
|]
let SumOfProb =
tmp |> Array.sum
tmp
|> Array.map (fun x -> x / SumOfProb)

///
let kullbackLeiblerDivergenceOf (qTo:float []) (p:float []) =
if qTo.Length <> p.Length then None
else
let divergence =
Array.fold2 (fun acc qY pY ->
qY
|> (/) pY
|> log
|> (*) pY
|> (+) acc
) 0. qTo p
Some divergence
70 changes: 10 additions & 60 deletions src/BioFSharp.Mz/Quantification.fs
Expand Up @@ -93,7 +93,16 @@ module Quantification =
createGausParams amplitude meanX std fwhm

module Fitting =


///
let standardErrorOfPrediction dOF (model:float []) (data:float []) =
let n = data.Length-1 |> float
match n with
| x when x > dOF ->
let sumOfResSq = Array.fold2 (fun acc yReal yPred -> acc + ((yPred-yReal)**2.) ) 0.0 data model
sqrt( (sumOfResSq / (n - dOF)))
| _ -> -1.

///
type Model =
{
Expand Down Expand Up @@ -275,65 +284,6 @@ module Quantification =

paramsAtIteration.[paramsAtIteration.Count-1]

/// Returns a parameter vector as a possible solution for linear least square based nonlinear fitting of a given dataset (xData, yData) with a given
/// model function.
let gaussNewtonSolver' (model: Model) (solverOptions: SolverOptions) (xData: float[]) (yData: float []) (paramsAtIteration: ResizeArray<_>) =

let mutable anotherIteration = true
/// Number of Parameters of modelFunction
let paramCount = solverOptions.InitialParamGuess.Length
let dataPointCount = xData.Length

let currentParamGuess = new DenseVector(solverOptions.InitialParamGuess)
let newParamGuess = new DenseVector(paramCount)

let mutable currentValueRSS = 0.0
let mutable newValueRSS = 0.0
///
currentValueRSS <- getRSS model dataPointCount xData yData currentParamGuess

// while (anotherIteration = true) do

let jacobian = new DenseMatrix(dataPointCount, paramCount)
let residualVector = new DenseVector(dataPointCount)

///
getJacobianOf model dataPointCount xData currentParamGuess jacobian |> ignore

///
getResidualVector model dataPointCount xData yData currentParamGuess residualVector |> ignore
jacobian,residualVector
///
// let step = jacobian.Transpose().Multiply(jacobian).Cholesky().Solve(jacobian.Transpose().Multiply(residualVector))
//
// ///
// currentParamGuess.Subtract(step, newParamGuess)
//
// ///
// newValueRSS <- getRSS model dataPointCount xData yData newParamGuess
//
// ///
// paramsAtIteration.Add(newParamGuess) |> ignore
//
// ///
// anotherIteration <- shouldTerminate currentValueRSS newValueRSS paramsAtIteration.Count currentParamGuess newParamGuess solverOptions
//
// ///
// newParamGuess.CopyTo currentParamGuess
//
// ///
// currentValueRSS <- newValueRSS

// paramsAtIteration.[paramsAtIteration.Count-1]

let standardErrorOfPrediction dOF (model:float []) (data:float []) =
let n = data.Length-1 |> float
match n with
| x when x > dOF ->
let sumOfResSq = Array.fold2 (fun acc yReal yPred -> acc + ((yPred-yReal)**2.) ) 0.0 data model
sqrt( (sumOfResSq / (n - dOF)))
| _ -> -1.


module Table =

Expand Down

0 comments on commit 80a67b2

Please sign in to comment.