initial commit

functions to compute shift functions for dependent and independent groups
GRousselet · Jul 26, 2016 · 2a9688f · 2a9688f
1 parent 79b8722
commit 2a9688f
Show file tree

Hide file tree

Showing 4 changed files with 396 additions and 0 deletions.
diff --git a/shiftdhd.m b/shiftdhd.m
@@ -0,0 +1,95 @@
+function [xd, yd, delta, deltaCI] = shiftdhd(x,y,nboot,plotit)
+%[xd yd delta deltaCI] = shiftdhd(x,y,nboot,plotshift)
+% SHIFTDHD computes the 95% simultaneous confidence intervals
+% for the difference between the deciles of two dependent groups
+% using the Harrell-Davis quantile estimator, in conjunction with
+% percentile bootstrap estimation of the quantiles' standard errors.
+% See Wilcox Robust hypothesis testing book 2005 p.184-187
+%
+% INPUTS:
+% - x & y are vectors of the same length without missing values
+% - nboot = number of bootstrap samples - default = 200
+% - plotit = 1 to get a figure; 0 otherwise by default
+%
+% OUTPUTS:
+% - xd & yd = vectors of quantiles
+% - delta = vector of differences between quantiles (x-y)
+% - deltaCI = matrix quantiles x low/high bounds of the confidence
+% intervals of the quantile differences
+%
+% Tied values are not allowed.
+% If tied values occur, or if you want to estimate quantiles other than the deciles,
+% or if you want to use an alpha other than 0.05, use SHIFTDHD_PBCI instead.
+%
+% Adaptation of Rand Wilcox's shifthd R function,
+% http://dornsife.usc.edu/labs/rwilcox/software/
+%
+% See also HD, SHIFTDHD, SHIFTHD_PBCI
+
+% Copyright (C) 2007, 2013, 2016 Guillaume Rousselet - University of Glasgow
+% Original R code by Rand Wilcox
+% GAR, University of Glasgow, Dec 2007
+% GAR, April 2013: replace randsample with randi
+% GAR, June 2013: added list option
+% GAR, 7 Jun 2016: removed list option
+% GAR, 17 Jun 2016: updated help for github release
+
+if nargin < 3;nboot=200;end
+if nargin < 4;plotit=0;end
+
+if numel(x) ~= numel(y)
+   error('shiftdhd: x and y must be the same length') 
+end
+
+n=length(x);
+c=(37./n.^1.4)+2.75; % The constant c was determined so that the simultaneous
+                     % probability coverage of all 9 differences is
+                     % approximately 95% when sampling from normal
+                     % distributions
+
+% Get >>ONE<< set of bootstrap samples
+% The same set is used for all nine quantiles being compared
+list = randi(n,nboot,n);
+
+xd = zeros(9,1);
+yd = zeros(9,1);
+delta = zeros(9,1);
+deltaCI = zeros(9,2);
+bootdelta = zeros(nboot,1);
+
+for d=1:9
+    q = d/10;
+    xd(d) = hd(x,q);
+    yd(d) = hd(y,q);
+    delta(d) = xd(d) - yd(d);
+    for b=1:nboot
+        bootdelta(b) = hd(x(list(b,:)),q) - hd(y(list(b,:)),q);
+    end
+    delta_bse = std(bootdelta,0);
+    deltaCI(d,1) = delta(d)-c.*delta_bse;
+    deltaCI(d,2) = delta(d)+c.*delta_bse;
+end
+
+if plotit==1
+
+    figure('Color','w','NumberTitle','off');hold on
+
+    ext = 0.1*(max(xd)-min(xd));
+    plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line
+    for qi = 1:9
+       plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2) 
+    end
+    % mark median
+    v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:')
+    plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1)
+    set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext])
+    box on
+    xlabel('Group 1 deciles','FontSize',16)
+    ylabel('Group 1 - group 2 deciles','FontSize',16)
+
+end
+
+%% TEST
+% Data from Wilcox p.187
+% time1=[0 32 9 0 2 0 41 0 0 0 6 18 3 3 0 11 11 2 0 11];
+% time3=[0 25 10 11 2 0 17 0 3 6 16 9 1 4 0 14 7 5 11 14];
diff --git a/shiftdhd_pbci.m b/shiftdhd_pbci.m
@@ -0,0 +1,104 @@
+function [xd, yd, delta, deltaCI] = shiftdhd_pbci(x,y,q,nboot,alpha,plotit)
+% [xd, yd, delta, deltaCI] = shiftdhd_pbci(x,y,q,nboot,alpha,plotit)
+% Computes a shift function for two dependent groups by comparing 
+% the quantiles of the marginal distributions using the
+% Harrell-Davis quantile estimator in conjunction with a percentile
+% bootstrap approach.
+%
+% INPUTS:
+% - x & y are vectors of the same length without missing values
+% - q = quantiles to estimate - default = 0.1:0.1:.9 (deciles)
+% - nboot = number of bootstrap samples - default = 2000
+% - alpha = expected long-run type I error rate - default = 0.05
+% - plotit = 1 to get a figure; 0 otherwise by default
+%
+% OUTPUTS:
+% - xd & yd = vectors of quantiles
+% - delta = vector of differences between quantiles (x-y)
+% - deltaCI = matrix quantiles x low/high bounds of the confidence
+% intervals of the quantile differences
+%
+% Unlike shiftdhd:
+% - the confidence intervals are not corrected for multiple comparisons
+% (the R version provides corrected critical p values - here i've decided
+% not to provide p values at all - the goal is to understand how
+% distributions differ, not to make binary decisions)
+% - the confidence intervals are calculated using a percentile bootstrap of
+%   the quantiles, instead of a percentile bootstrap of the standard error of the quantiles
+% - the quantiles to compare can be specified and are not limited to the
+%   deciles
+% - Tied values are allowed
+%
+% Unlike Rand Wilcox's Dqcomhd R function, no p value is returned.
+% Extensive experience suggests humans cannot be trusted with p
+% values.
+%
+% Adaptation of Rand Wilcox's Dqcomhd R function,
+% http://dornsife.usc.edu/labs/rwilcox/software/
+%
+% See also HD, SHIFTDHD, SHIFTHD_PBCI
+
+% Copyright (C) 2016 Guillaume Rousselet - University of Glasgow
+% GAR 2016-06-16 - first version
+
+if nargin<3 || isempty(q)
+    q = .1:.1:.9;
+end
+if nargin<4 || isempty(nboot)
+    nboot = 2000;
+end
+if nargin<5 || isempty(alpha)
+    alpha = 0.05;
+end
+if nargin<6 || isempty(plotit)
+    plotit = 0;
+end
+
+Nq = numel(q);
+xd = zeros(Nq,1);
+yd = zeros(Nq,1);
+delta = zeros(Nq,1);
+deltaCI = zeros(Nq,2);
+Nx = numel(x);
+Ny = numel(y);
+if Nx ~= Ny
+   error('shiftdhd_pbci: x and y must be the same length') 
+end
+bootdelta = zeros(Nq,nboot);
+
+lo = round(nboot.*alpha./2); % get CI boundary indices
+hi = nboot - lo;
+lo = lo+1;
+
+for qi = 1:Nq
+    xd(qi) = hd(x,q(qi));
+    yd(qi) = hd(y,q(qi));
+    delta(qi) = xd(qi) - yd(qi);
+    for b = 1:nboot
+        bootsample = randi(Nx,1,Nx); % sample pairs of observations
+        bootdelta(qi,b) = hd(x(bootsample),q(qi)) - hd(y(bootsample),q(qi));
+    end
+end
+
+bootdelta = sort(bootdelta,2); % sort in ascending order
+deltaCI(:,1) = bootdelta(:,lo);
+deltaCI(:,2) = bootdelta(:,hi);
+
+if plotit == 1
+
+    figure('Color','w','NumberTitle','off');hold on
+
+    ext = 0.1*(max(xd)-min(xd));
+    plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line
+    for qi = 1:Nq
+       plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2) 
+    end
+    % mark median
+    v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:')
+    plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1)
+    set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext])
+    box on
+    xlabel('Group 1 quantiles','FontSize',16)
+    ylabel('Group 1 - group 2 quantiles','FontSize',16)
+
+end
diff --git a/shifthd.m b/shifthd.m
@@ -0,0 +1,98 @@
+function [xd, yd, delta, deltaCI] = shifthd(x,y,nboot,plotit)
+% [xd yd delta deltaCI] = shifthd(x,y,nboot,plotshift)
+% SHIFTHD computes the 95% simultaneous confidence intervals
+% for the difference between the deciles of two independent groups
+% using the Harrell-Davis quantile estimator, in conjunction with
+% percentile bootstrap estimation of the quantiles' standard errors.
+% See Wilcox Robust hypothesis testing book 2005 p.151-155
+%
+% INPUTS:
+% - x & y are vectors of the same length without missing values
+% - nboot = number of bootstrap samples - default = 200
+% - plotit = 1 to get a figure; 0 otherwise by default
+%
+% OUTPUTS:
+% - xd & yd = vectors of quantiles
+% - delta = vector of differences between quantiles (x-y)
+% - deltaCI = matrix quantiles x low/high bounds of the confidence
+% intervals of the quantile differences
+%
+% Tied values are not allowed.
+% If tied values occur, or if you want to estimate quantiles other than the deciles,
+% or if you want to use an alpha other than 0.05, use SHIFTHD_PBCI instead.
+%
+% Adaptation of Rand Wilcox's shifthd R function,
+% http://dornsife.usc.edu/labs/rwilcox/software/
+%
+% See also HD, SHIFTDHD, SHIFTHD_PBCI
+
+% Copyright (C) 2007, 2013, 2016 Guillaume Rousselet - University of Glasgow
+% GAR, University of Glasgow, Dec 2007
+% GAR, April 2013: replace randsample with randi
+% GAR, June 2013: added list option
+% GAR, 29 May 2016: removed call to bootse, removed list option and fixed bootstrap sampling error -
+%                   use independent bootstrap sample for each decile.
+% GAR, 17 Jun 2016: edited help for github release
+
+if nargin < 3;nboot=200;end
+if nargin < 4;plotit=0;end
+
+nx=length(x);
+ny=length(y);
+n=min(nx,ny);
+c=(80.1./n.^2)+2.73; % The constant c was determined so that the simultaneous
+% probability coverage of all 9 differences is
+% approximately 95% when sampling from normal
+% distributions
+
+xd = zeros(9,1);
+yd = zeros(9,1);
+delta = zeros(9,1);
+deltaCI = zeros(9,2);
+
+for d = 1:9
+
+    q = d./10;
+    xd(d) = hd(x,q);
+    yd(d) = hd(y,q);
+
+    xboot = zeros(nboot,1);
+    yboot = zeros(nboot,1);
+    xlist=randi(nx,nboot,nx);
+    ylist=randi(ny,nboot,ny);
+
+    % Get a different set of bootstrap samples for each decile 
+    % and each group
+    for B = 1:nboot
+        xboot(B) = hd(x(xlist(B,:)),q);
+        yboot(B) = hd(y(ylist(B,:)),q);
+    end
+
+    sqrt_var_sum = sqrt( var(xboot) + var(yboot) );
+    delta(d) = xd(d)-yd(d);
+    deltaCI(d,1) = delta(d)-c.*sqrt_var_sum;
+    deltaCI(d,2) = delta(d)+c.*sqrt_var_sum;
+end
+
+if plotit==1
+
+    figure('Color','w','NumberTitle','off');hold on
+
+    ext = 0.1*(max(xd)-min(xd));
+    plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line
+    for qi = 1:9
+       plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2) 
+    end
+    % mark median
+    v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:')
+    plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1)
+    set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext])
+    box on
+    xlabel('Group 1 quantiles','FontSize',16)
+    ylabel('Group 1 - group 2 quantiles','FontSize',16)
+
+end
+
+% Data from Wilcox 2005 p.150
+% control=[41 38.4 24.4 25.9 21.9 18.3 13.1 27.3 28.5 -16.9 26 17.4 21.8 15.4 27.4 19.2 22.4 17.7 26 29.4 21.4 26.6 22.7];
+% ozone=[10.1 6.1 20.4 7.3 14.3 15.5 -9.9 6.8 28.2 17.9 -9 -12.9 14 6.6 12.1 15.7 39.9 -15.9 54.6 -14.7 44.1 -9];