diff --git a/shiftdhd.m b/shiftdhd.m new file mode 100755 index 0000000..3cf6b02 --- /dev/null +++ b/shiftdhd.m @@ -0,0 +1,95 @@ +function [xd, yd, delta, deltaCI] = shiftdhd(x,y,nboot,plotit) +%[xd yd delta deltaCI] = shiftdhd(x,y,nboot,plotshift) +% SHIFTDHD computes the 95% simultaneous confidence intervals +% for the difference between the deciles of two dependent groups +% using the Harrell-Davis quantile estimator, in conjunction with +% percentile bootstrap estimation of the quantiles' standard errors. +% See Wilcox Robust hypothesis testing book 2005 p.184-187 +% +% INPUTS: +% - x & y are vectors of the same length without missing values +% - nboot = number of bootstrap samples - default = 200 +% - plotit = 1 to get a figure; 0 otherwise by default +% +% OUTPUTS: +% - xd & yd = vectors of quantiles +% - delta = vector of differences between quantiles (x-y) +% - deltaCI = matrix quantiles x low/high bounds of the confidence +% intervals of the quantile differences +% +% Tied values are not allowed. +% If tied values occur, or if you want to estimate quantiles other than the deciles, +% or if you want to use an alpha other than 0.05, use SHIFTDHD_PBCI instead. +% +% Adaptation of Rand Wilcox's shifthd R function, +% http://dornsife.usc.edu/labs/rwilcox/software/ +% +% See also HD, SHIFTDHD, SHIFTHD_PBCI + +% Copyright (C) 2007, 2013, 2016 Guillaume Rousselet - University of Glasgow +% Original R code by Rand Wilcox +% GAR, University of Glasgow, Dec 2007 +% GAR, April 2013: replace randsample with randi +% GAR, June 2013: added list option +% GAR, 7 Jun 2016: removed list option +% GAR, 17 Jun 2016: updated help for github release + +if nargin < 3;nboot=200;end +if nargin < 4;plotit=0;end + +if numel(x) ~= numel(y) + error('shiftdhd: x and y must be the same length') +end + +n=length(x); +c=(37./n.^1.4)+2.75; % The constant c was determined so that the simultaneous + % probability coverage of all 9 differences is + % approximately 95% when sampling from normal + % distributions + +% Get >>ONE<< set of bootstrap samples +% The same set is used for all nine quantiles being compared +list = randi(n,nboot,n); + +xd = zeros(9,1); +yd = zeros(9,1); +delta = zeros(9,1); +deltaCI = zeros(9,2); +bootdelta = zeros(nboot,1); + +for d=1:9 + q = d/10; + xd(d) = hd(x,q); + yd(d) = hd(y,q); + delta(d) = xd(d) - yd(d); + for b=1:nboot + bootdelta(b) = hd(x(list(b,:)),q) - hd(y(list(b,:)),q); + end + delta_bse = std(bootdelta,0); + deltaCI(d,1) = delta(d)-c.*delta_bse; + deltaCI(d,2) = delta(d)+c.*delta_bse; +end + +if plotit==1 + + figure('Color','w','NumberTitle','off');hold on + + ext = 0.1*(max(xd)-min(xd)); + plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line + for qi = 1:9 + plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2) + end + % mark median + v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:') + plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1) + set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext]) + box on + xlabel('Group 1 deciles','FontSize',16) + ylabel('Group 1 - group 2 deciles','FontSize',16) + +end + +%% TEST +% Data from Wilcox p.187 +% time1=[0 32 9 0 2 0 41 0 0 0 6 18 3 3 0 11 11 2 0 11]; +% time3=[0 25 10 11 2 0 17 0 3 6 16 9 1 4 0 14 7 5 11 14]; diff --git a/shiftdhd_pbci.m b/shiftdhd_pbci.m new file mode 100644 index 0000000..e6f1a7f --- /dev/null +++ b/shiftdhd_pbci.m @@ -0,0 +1,104 @@ +function [xd, yd, delta, deltaCI] = shiftdhd_pbci(x,y,q,nboot,alpha,plotit) +% [xd, yd, delta, deltaCI] = shiftdhd_pbci(x,y,q,nboot,alpha,plotit) +% Computes a shift function for two dependent groups by comparing +% the quantiles of the marginal distributions using the +% Harrell-Davis quantile estimator in conjunction with a percentile +% bootstrap approach. +% +% INPUTS: +% - x & y are vectors of the same length without missing values +% - q = quantiles to estimate - default = 0.1:0.1:.9 (deciles) +% - nboot = number of bootstrap samples - default = 2000 +% - alpha = expected long-run type I error rate - default = 0.05 +% - plotit = 1 to get a figure; 0 otherwise by default +% +% OUTPUTS: +% - xd & yd = vectors of quantiles +% - delta = vector of differences between quantiles (x-y) +% - deltaCI = matrix quantiles x low/high bounds of the confidence +% intervals of the quantile differences +% +% Unlike shiftdhd: +% - the confidence intervals are not corrected for multiple comparisons +% (the R version provides corrected critical p values - here i've decided +% not to provide p values at all - the goal is to understand how +% distributions differ, not to make binary decisions) +% - the confidence intervals are calculated using a percentile bootstrap of +% the quantiles, instead of a percentile bootstrap of the standard error of the quantiles +% - the quantiles to compare can be specified and are not limited to the +% deciles +% - Tied values are allowed +% +% Unlike Rand Wilcox's Dqcomhd R function, no p value is returned. +% Extensive experience suggests humans cannot be trusted with p +% values. +% +% Adaptation of Rand Wilcox's Dqcomhd R function, +% http://dornsife.usc.edu/labs/rwilcox/software/ +% +% See also HD, SHIFTDHD, SHIFTHD_PBCI + +% Copyright (C) 2016 Guillaume Rousselet - University of Glasgow +% GAR 2016-06-16 - first version + +if nargin<3 || isempty(q) + q = .1:.1:.9; +end +if nargin<4 || isempty(nboot) + nboot = 2000; +end +if nargin<5 || isempty(alpha) + alpha = 0.05; +end +if nargin<6 || isempty(plotit) + plotit = 0; +end + +Nq = numel(q); +xd = zeros(Nq,1); +yd = zeros(Nq,1); +delta = zeros(Nq,1); +deltaCI = zeros(Nq,2); +Nx = numel(x); +Ny = numel(y); +if Nx ~= Ny + error('shiftdhd_pbci: x and y must be the same length') +end +bootdelta = zeros(Nq,nboot); + +lo = round(nboot.*alpha./2); % get CI boundary indices +hi = nboot - lo; +lo = lo+1; + +for qi = 1:Nq + xd(qi) = hd(x,q(qi)); + yd(qi) = hd(y,q(qi)); + delta(qi) = xd(qi) - yd(qi); + for b = 1:nboot + bootsample = randi(Nx,1,Nx); % sample pairs of observations + bootdelta(qi,b) = hd(x(bootsample),q(qi)) - hd(y(bootsample),q(qi)); + end +end + +bootdelta = sort(bootdelta,2); % sort in ascending order +deltaCI(:,1) = bootdelta(:,lo); +deltaCI(:,2) = bootdelta(:,hi); + +if plotit == 1 + + figure('Color','w','NumberTitle','off');hold on + + ext = 0.1*(max(xd)-min(xd)); + plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line + for qi = 1:Nq + plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2) + end + % mark median + v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:') + plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1) + set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext]) + box on + xlabel('Group 1 quantiles','FontSize',16) + ylabel('Group 1 - group 2 quantiles','FontSize',16) + +end \ No newline at end of file diff --git a/shifthd.m b/shifthd.m new file mode 100755 index 0000000..fcc895c --- /dev/null +++ b/shifthd.m @@ -0,0 +1,98 @@ +function [xd, yd, delta, deltaCI] = shifthd(x,y,nboot,plotit) +% [xd yd delta deltaCI] = shifthd(x,y,nboot,plotshift) +% SHIFTHD computes the 95% simultaneous confidence intervals +% for the difference between the deciles of two independent groups +% using the Harrell-Davis quantile estimator, in conjunction with +% percentile bootstrap estimation of the quantiles' standard errors. +% See Wilcox Robust hypothesis testing book 2005 p.151-155 +% +% INPUTS: +% - x & y are vectors of the same length without missing values +% - nboot = number of bootstrap samples - default = 200 +% - plotit = 1 to get a figure; 0 otherwise by default +% +% OUTPUTS: +% - xd & yd = vectors of quantiles +% - delta = vector of differences between quantiles (x-y) +% - deltaCI = matrix quantiles x low/high bounds of the confidence +% intervals of the quantile differences +% +% Tied values are not allowed. +% If tied values occur, or if you want to estimate quantiles other than the deciles, +% or if you want to use an alpha other than 0.05, use SHIFTHD_PBCI instead. +% +% Adaptation of Rand Wilcox's shifthd R function, +% http://dornsife.usc.edu/labs/rwilcox/software/ +% +% See also HD, SHIFTDHD, SHIFTHD_PBCI + +% Copyright (C) 2007, 2013, 2016 Guillaume Rousselet - University of Glasgow +% GAR, University of Glasgow, Dec 2007 +% GAR, April 2013: replace randsample with randi +% GAR, June 2013: added list option +% GAR, 29 May 2016: removed call to bootse, removed list option and fixed bootstrap sampling error - +% use independent bootstrap sample for each decile. +% GAR, 17 Jun 2016: edited help for github release + +if nargin < 3;nboot=200;end +if nargin < 4;plotit=0;end + +nx=length(x); +ny=length(y); +n=min(nx,ny); +c=(80.1./n.^2)+2.73; % The constant c was determined so that the simultaneous +% probability coverage of all 9 differences is +% approximately 95% when sampling from normal +% distributions + +xd = zeros(9,1); +yd = zeros(9,1); +delta = zeros(9,1); +deltaCI = zeros(9,2); + +for d = 1:9 + + q = d./10; + xd(d) = hd(x,q); + yd(d) = hd(y,q); + + xboot = zeros(nboot,1); + yboot = zeros(nboot,1); + xlist=randi(nx,nboot,nx); + ylist=randi(ny,nboot,ny); + + % Get a different set of bootstrap samples for each decile + % and each group + for B = 1:nboot + xboot(B) = hd(x(xlist(B,:)),q); + yboot(B) = hd(y(ylist(B,:)),q); + end + + sqrt_var_sum = sqrt( var(xboot) + var(yboot) ); + delta(d) = xd(d)-yd(d); + deltaCI(d,1) = delta(d)-c.*sqrt_var_sum; + deltaCI(d,2) = delta(d)+c.*sqrt_var_sum; +end + +if plotit==1 + + figure('Color','w','NumberTitle','off');hold on + + ext = 0.1*(max(xd)-min(xd)); + plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line + for qi = 1:9 + plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2) + end + % mark median + v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:') + plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1) + set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext]) + box on + xlabel('Group 1 quantiles','FontSize',16) + ylabel('Group 1 - group 2 quantiles','FontSize',16) + +end + +% Data from Wilcox 2005 p.150 +% control=[41 38.4 24.4 25.9 21.9 18.3 13.1 27.3 28.5 -16.9 26 17.4 21.8 15.4 27.4 19.2 22.4 17.7 26 29.4 21.4 26.6 22.7]; +% ozone=[10.1 6.1 20.4 7.3 14.3 15.5 -9.9 6.8 28.2 17.9 -9 -12.9 14 6.6 12.1 15.7 39.9 -15.9 54.6 -14.7 44.1 -9]; diff --git a/shifthd_pbci.m b/shifthd_pbci.m new file mode 100644 index 0000000..b5e83cc --- /dev/null +++ b/shifthd_pbci.m @@ -0,0 +1,99 @@ +function [xd, yd, delta, deltaCI] = shifthd_pbci(x,y,q,nboot,alpha,plotit) +% [xd, yd, delta, deltaCI] = shifthd_pbci(x,y,q,nboot,alpha,plotit) +% Computes a shift function for two independent groups using the +% Harrell-Davis quantile estimator in conjunction with a percentile +% bootstrap approach. +% +% INPUTS: +% - x & y are vectors; no missing values are allowed +% - q = quantiles to estimate - default = deciles 0.1:0.1:.9 +% - nboot = number of bootstrap samples - default = 2000 +% - alpha = expected long-run type I error rate - default = 0.05 +% - plotit = 1 to get a figure; 0 otherwise by default +% +% OUTPUTS: +% - xd & yd = vectors of quantiles +% - delta = vector of differences between quantiles (x-y) +% - deltaCI = matrix quantiles x low/high bounds of the confidence +% intervals of the quantile differences +% +% Unlike shifthd: +% - the confidence intervals are not corrected for multiple comparisons +% (the R version provides corrected critical p values - here i've decided +% not to provide p values at all - the goal is to understand how +% distributions differ, not to make binary decisions) +% - the confidence intervals are calculated using a percentile bootstrap of +% the quantiles, instead of a percentile bootstrap of the standard error of the quantiles +% - the quantiles to compare can be specified and are not limited to the +% deciles +% - Tied values are allowed +% +% Unlike Rand Wilcox's qcomhd R function, no p value is returned. +% Extensive experience suggests human beings cannot be trusted with p +% values. +% +% Adaptation of Rand Wilcox's qcomhd R function, +% http://dornsife.usc.edu/labs/rwilcox/software/ +% +% See also HD, SHIFTHD, SHIFTDHD_PBCI + +% Copyright (C) 2016 Guillaume Rousselet - University of Glasgow +% GAR 2016-06-16 - first version + +if nargin<3 || isempty(q) + q = .1:.1:.9; +end +if nargin<4 || isempty(nboot) + nboot = 2000; +end +if nargin<5 || isempty(alpha) + alpha = 0.05; +end +if nargin<6 || isempty(plotit) + plotit = 0; +end + +Nq = numel(q); +xd = zeros(Nq,1); +yd = zeros(Nq,1); +delta = zeros(Nq,1); +deltaCI = zeros(Nq,2); +Nx = numel(x); +Ny = numel(y); +bootdelta = zeros(Nq,nboot); + +lo = round(nboot.*alpha./2); % get CI boundary indices +hi = nboot - lo; +lo = lo+1; + +for qi = 1:Nq + xd(qi) = hd(x,q(qi)); + yd(qi) = hd(y,q(qi)); + delta(qi) = xd(qi) - yd(qi); + for b = 1:nboot + bootdelta(qi,b) = hd(x(randi(Nx,1,Nx)),q(qi)) - hd(y(randi(Ny,1,Ny)),q(qi)); + end +end + +bootdelta = sort(bootdelta,2); % sort in ascending order +deltaCI(:,1) = bootdelta(:,lo); +deltaCI(:,2) = bootdelta(:,hi); + +if plotit == 1 + + figure('Color','w','NumberTitle','off');hold on + + ext = 0.1*(max(xd)-min(xd)); + plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line + for qi = 1:Nq + plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2) + end + % mark median + v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:') + plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1) + set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext]) + box on + xlabel('Group 1 quantiles','FontSize',16) + ylabel('Group 1 - group 2 quantiles','FontSize',16) + +end