Skip to content

Commit

Permalink
add H-test
Browse files Browse the repository at this point in the history
  • Loading branch information
zieglerSe committed Sep 1, 2020
1 parent ce00f67 commit 8fc3c5f
Show file tree
Hide file tree
Showing 5 changed files with 139 additions and 0 deletions.
1 change: 1 addition & 0 deletions FSharp.Stats.sln
Expand Up @@ -35,6 +35,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "content", "content", "{8E6D
docsrc\content\Fitting.fsx = docsrc\content\Fitting.fsx
docsrc\content\GoodnessOfFit.fsx = docsrc\content\GoodnessOfFit.fsx
docsrc\content\GrowthCurve.fsx = docsrc\content\GrowthCurve.fsx
HTest.fsx = HTest.fsx
docsrc\content\Imputation.fsx = docsrc\content\Imputation.fsx
docsrc\content\index.fsx = docsrc\content\index.fsx
docsrc\content\Integration.fsx = docsrc\content\Integration.fsx
Expand Down
59 changes: 59 additions & 0 deletions HTest.fsx
@@ -0,0 +1,59 @@
(*** hide ***)
// This block of code is omitted in the generated HTML documentation. Use
// it to define helpers that you do not want to show in the documentation.
#r @"C:\Users\Selly\source\repos\FSharp.Stats\bin\FSharp.Stats\netstandard2.0\FSharp.Stats.dll"
#r @"C:\Users\Selly\source\repos\FSharp.Plotly-developer\FSharp.Plotly-developer\bin\FSharp.Plotly\netstandard2.0\FSharp.Plotly.dll"


open FSharp.Plotly
open FSharp.Plotly.Axis
open FSharp.Plotly.StyleParam

let myAxis title = LinearAxis.init(Title=title,Mirror=Mirror.All,Ticks=TickOptions.Inside,Showgrid=false,Showline=true,Zeroline=true)
let myAxisRange title range = LinearAxis.init(Title=title,Range=Range.MinMax range,Mirror=Mirror.All,Ticks=TickOptions.Inside,Showgrid=false,Showline=true,Zeroline=true)
let styleChart x y chart = chart |> Chart.withX_Axis (myAxis x) |> Chart.withY_Axis (myAxis y)
let styleChartRange x y rx ry chart = chart |> Chart.withX_Axis (myAxisRange x rx) |> Chart.withY_Axis (myAxisRange y ry)

(**
#Statistical testing
FSharp.Stats provides hypothesis tests for different applications.
A hypothesis test is a statistical test that is used to determine whether there is enough evidence
in a sample of data to infer that a certain condition is true for the entire population.
A hypothesis test examines two opposing hypotheses about a population: the null hypothesis and the alternative hypothesis.
<a name="TestStatistics"></a>
##Test Statistics
<a name="Anova"></a>
##Anova
*)

open FSharp.Stats
open FSharp.Stats.Testing

(**
< a name = "HTest"></a>
##H-Test
The H test is also known as Kruskal-Wallis one-way analysis-of-variance-by-ranks and is the nonparametric equivalent of one-way ANOVA.
It is a non-parametric test for comparing the means of more than two independent samples (equal or different sample size), and therefor is an extension of Wilcoxon-Mann-Whitney two sample test.
Testing with H test gives information whether the samples are from the same distribution.
A benefit of the H-test is, that it does not require normal distribution of the samples.
The downside is that there is no information which samples are different from each other, or how many differences occur. For further investigation a Post Hoc test is recommended.
Prerequisite :
- random and independent samples
- observations are from populations with same shape of distribution
- nominal scale, ordinal scale, ratio scale or interval scale data
The distribution of the H test statistic is approximated by chi-square distribution with degrees of freedom - 1.
References :
- E. Ostertagová, Methodology and Application of the Kruskal-Wallis Test (2014)
- Y. Chan, RP Walmsley, Learning and understanding the Kruskal-Wallis one-way analysis-of-variance-by-ranks test for differences among three or more independent groups (1997)
*H-test*
input : seq{seq<float>}
*)

let groupA = seq {23.;41.;54.;66.;78.}
let groupB = seq {45.;55.;60.;70.;72.}
let groupC = seq {18.;30.;34.;40.;44.}
let samples = seq{groupA;groupB;groupC}

// calculation of p-Value
1 change: 1 addition & 0 deletions docsrc/content/Testing.fsx
Expand Up @@ -310,6 +310,7 @@ let fTestFromData = FTest.testVariances sampleFA sampleFB
(**
*F-Test from given parameters:*
*)

// sample properties are given as (variance,degree of freedom)
let sampleF1 = (0.1, 15.)
let sampleF2 = (0.05, 7.)
Expand Down
1 change: 1 addition & 0 deletions src/FSharp.Stats/FSharp.Stats.fsproj
Expand Up @@ -106,6 +106,7 @@
<Compile Include="Testing\SAM.fs" />
<Compile Include="Testing\FisherHotelling.fs" />
<Compile Include="Testing\RMT.fs" />
<Compile Include="HTest.fs" />
<!-- Fitting -->
<Compile Include="Fitting\CrossValidation.fs" />
<Compile Include="Fitting\LinearRegression.fs" />
Expand Down
77 changes: 77 additions & 0 deletions src/FSharp.Stats/HTest.fs
@@ -0,0 +1,77 @@
namespace FSharp.Stats.Testing


module HTest =

open FSharp.Stats
// H-test / one-way ANOVA of ranks
// input : seq{seq<float>}
let htest (samples : seq<#seq<float>>) =
// calculating n for each group
let n = Seq.map Seq.length samples |> Seq.map float

// preparing samples for ranking and calculating samplesize
let allValues =
samples
|> Seq.concat
|> Seq.toArray

let samplesize = Seq.length allValues |> float

// ranking all values
let ranked = FSharp.Stats.Rank.rankAverage allValues

let valuesAndRanks = Array.zip allValues ranked

// match ranks with each group
let rankingOfEachGroup data =
data
|> Seq.choose (fun x ->
Array.tryFind (fun (i,j) -> i = x) valuesAndRanks
)
|> Seq.map snd
|> Seq.sum
|> float
let ranks = Seq.map rankingOfEachGroup samples

// counting ties in data

let ties =
valuesAndRanks
|> Seq.countBy id
|> Seq.filter (fun (i,j) -> j > 1 )
|> Seq.map snd
|> Seq.map float

if ties = seq [] then
let totalties =
ties
|> Seq.map (fun x -> x**3. - x )
|> Seq.sum
// correction factor for ties in data
let correctionFactor = 1. - ((totalties)/((samplesize**3.)-samplesize))
let parts data x =
Seq.map2 (fun data x -> (data**2. /x)) data x
let sums = parts ranks n |> Seq.sum
let dof =
let length =
samples
|> Seq.length
|> float
length - 1.
let testWithoutBindings = (12./(samplesize*(samplesize+1.))) * (sums) - 3.*(samplesize + 1.)
let statistic = testWithoutBindings / correctionFactor
FSharp.Stats.Testing.TestStatistics.createChiSquare statistic dof
else
// test statistic for H-test without ties
let parts data x =
Seq.map2 (fun data x -> (data**2. /x)) data x
let sums = parts ranks n |> Seq.sum
let dof =
let length =
samples
|> Seq.length
|> float
length - 1.
let statistic = (12./(samplesize*(samplesize+1.))) * (sums) - 3.*(samplesize + 1.)
FSharp.Stats.Testing.TestStatistics.createChiSquare statistic dof

0 comments on commit 8fc3c5f

Please sign in to comment.