Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
139 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
(*** hide ***) | ||
// This block of code is omitted in the generated HTML documentation. Use | ||
// it to define helpers that you do not want to show in the documentation. | ||
#r @"C:\Users\Selly\source\repos\FSharp.Stats\bin\FSharp.Stats\netstandard2.0\FSharp.Stats.dll" | ||
#r @"C:\Users\Selly\source\repos\FSharp.Plotly-developer\FSharp.Plotly-developer\bin\FSharp.Plotly\netstandard2.0\FSharp.Plotly.dll" | ||
|
||
|
||
open FSharp.Plotly | ||
open FSharp.Plotly.Axis | ||
open FSharp.Plotly.StyleParam | ||
|
||
let myAxis title = LinearAxis.init(Title=title,Mirror=Mirror.All,Ticks=TickOptions.Inside,Showgrid=false,Showline=true,Zeroline=true) | ||
let myAxisRange title range = LinearAxis.init(Title=title,Range=Range.MinMax range,Mirror=Mirror.All,Ticks=TickOptions.Inside,Showgrid=false,Showline=true,Zeroline=true) | ||
let styleChart x y chart = chart |> Chart.withX_Axis (myAxis x) |> Chart.withY_Axis (myAxis y) | ||
let styleChartRange x y rx ry chart = chart |> Chart.withX_Axis (myAxisRange x rx) |> Chart.withY_Axis (myAxisRange y ry) | ||
|
||
(** | ||
#Statistical testing | ||
FSharp.Stats provides hypothesis tests for different applications. | ||
A hypothesis test is a statistical test that is used to determine whether there is enough evidence | ||
in a sample of data to infer that a certain condition is true for the entire population. | ||
A hypothesis test examines two opposing hypotheses about a population: the null hypothesis and the alternative hypothesis. | ||
<a name="TestStatistics"></a> | ||
##Test Statistics | ||
<a name="Anova"></a> | ||
##Anova | ||
*) | ||
|
||
open FSharp.Stats | ||
open FSharp.Stats.Testing | ||
|
||
(** | ||
< a name = "HTest"></a> | ||
##H-Test | ||
The H test is also known as Kruskal-Wallis one-way analysis-of-variance-by-ranks and is the nonparametric equivalent of one-way ANOVA. | ||
It is a non-parametric test for comparing the means of more than two independent samples (equal or different sample size), and therefor is an extension of Wilcoxon-Mann-Whitney two sample test. | ||
Testing with H test gives information whether the samples are from the same distribution. | ||
A benefit of the H-test is, that it does not require normal distribution of the samples. | ||
The downside is that there is no information which samples are different from each other, or how many differences occur. For further investigation a Post Hoc test is recommended. | ||
Prerequisite : | ||
- random and independent samples | ||
- observations are from populations with same shape of distribution | ||
- nominal scale, ordinal scale, ratio scale or interval scale data | ||
The distribution of the H test statistic is approximated by chi-square distribution with degrees of freedom - 1. | ||
References : | ||
- E. Ostertagová, Methodology and Application of the Kruskal-Wallis Test (2014) | ||
- Y. Chan, RP Walmsley, Learning and understanding the Kruskal-Wallis one-way analysis-of-variance-by-ranks test for differences among three or more independent groups (1997) | ||
*H-test* | ||
input : seq{seq<float>} | ||
*) | ||
|
||
let groupA = seq {23.;41.;54.;66.;78.} | ||
let groupB = seq {45.;55.;60.;70.;72.} | ||
let groupC = seq {18.;30.;34.;40.;44.} | ||
let samples = seq{groupA;groupB;groupC} | ||
|
||
// calculation of p-Value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
namespace FSharp.Stats.Testing | ||
|
||
|
||
module HTest = | ||
|
||
open FSharp.Stats | ||
// H-test / one-way ANOVA of ranks | ||
// input : seq{seq<float>} | ||
let htest (samples : seq<#seq<float>>) = | ||
// calculating n for each group | ||
let n = Seq.map Seq.length samples |> Seq.map float | ||
|
||
// preparing samples for ranking and calculating samplesize | ||
let allValues = | ||
samples | ||
|> Seq.concat | ||
|> Seq.toArray | ||
|
||
let samplesize = Seq.length allValues |> float | ||
|
||
// ranking all values | ||
let ranked = FSharp.Stats.Rank.rankAverage allValues | ||
|
||
let valuesAndRanks = Array.zip allValues ranked | ||
|
||
// match ranks with each group | ||
let rankingOfEachGroup data = | ||
data | ||
|> Seq.choose (fun x -> | ||
Array.tryFind (fun (i,j) -> i = x) valuesAndRanks | ||
) | ||
|> Seq.map snd | ||
|> Seq.sum | ||
|> float | ||
let ranks = Seq.map rankingOfEachGroup samples | ||
|
||
// counting ties in data | ||
|
||
let ties = | ||
valuesAndRanks | ||
|> Seq.countBy id | ||
|> Seq.filter (fun (i,j) -> j > 1 ) | ||
|> Seq.map snd | ||
|> Seq.map float | ||
|
||
if ties = seq [] then | ||
let totalties = | ||
ties | ||
|> Seq.map (fun x -> x**3. - x ) | ||
|> Seq.sum | ||
// correction factor for ties in data | ||
let correctionFactor = 1. - ((totalties)/((samplesize**3.)-samplesize)) | ||
let parts data x = | ||
Seq.map2 (fun data x -> (data**2. /x)) data x | ||
let sums = parts ranks n |> Seq.sum | ||
let dof = | ||
let length = | ||
samples | ||
|> Seq.length | ||
|> float | ||
length - 1. | ||
let testWithoutBindings = (12./(samplesize*(samplesize+1.))) * (sums) - 3.*(samplesize + 1.) | ||
let statistic = testWithoutBindings / correctionFactor | ||
FSharp.Stats.Testing.TestStatistics.createChiSquare statistic dof | ||
else | ||
// test statistic for H-test without ties | ||
let parts data x = | ||
Seq.map2 (fun data x -> (data**2. /x)) data x | ||
let sums = parts ranks n |> Seq.sum | ||
let dof = | ||
let length = | ||
samples | ||
|> Seq.length | ||
|> float | ||
length - 1. | ||
let statistic = (12./(samplesize*(samplesize+1.))) * (sums) - 3.*(samplesize + 1.) | ||
FSharp.Stats.Testing.TestStatistics.createChiSquare statistic dof |