Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
functions to compute shift functions for dependent and independent
groups
  • Loading branch information
GRousselet committed Jul 26, 2016
1 parent 79b8722 commit 2a9688f
Show file tree
Hide file tree
Showing 4 changed files with 396 additions and 0 deletions.
95 changes: 95 additions & 0 deletions shiftdhd.m
@@ -0,0 +1,95 @@
function [xd, yd, delta, deltaCI] = shiftdhd(x,y,nboot,plotit)
%[xd yd delta deltaCI] = shiftdhd(x,y,nboot,plotshift)
% SHIFTDHD computes the 95% simultaneous confidence intervals
% for the difference between the deciles of two dependent groups
% using the Harrell-Davis quantile estimator, in conjunction with
% percentile bootstrap estimation of the quantiles' standard errors.
% See Wilcox Robust hypothesis testing book 2005 p.184-187
%
% INPUTS:
% - x & y are vectors of the same length without missing values
% - nboot = number of bootstrap samples - default = 200
% - plotit = 1 to get a figure; 0 otherwise by default
%
% OUTPUTS:
% - xd & yd = vectors of quantiles
% - delta = vector of differences between quantiles (x-y)
% - deltaCI = matrix quantiles x low/high bounds of the confidence
% intervals of the quantile differences
%
% Tied values are not allowed.
% If tied values occur, or if you want to estimate quantiles other than the deciles,
% or if you want to use an alpha other than 0.05, use SHIFTDHD_PBCI instead.
%
% Adaptation of Rand Wilcox's shifthd R function,
% http://dornsife.usc.edu/labs/rwilcox/software/
%
% See also HD, SHIFTDHD, SHIFTHD_PBCI

% Copyright (C) 2007, 2013, 2016 Guillaume Rousselet - University of Glasgow
% Original R code by Rand Wilcox
% GAR, University of Glasgow, Dec 2007
% GAR, April 2013: replace randsample with randi
% GAR, June 2013: added list option
% GAR, 7 Jun 2016: removed list option
% GAR, 17 Jun 2016: updated help for github release

if nargin < 3;nboot=200;end
if nargin < 4;plotit=0;end

if numel(x) ~= numel(y)
error('shiftdhd: x and y must be the same length')
end

n=length(x);
c=(37./n.^1.4)+2.75; % The constant c was determined so that the simultaneous
% probability coverage of all 9 differences is
% approximately 95% when sampling from normal
% distributions

% Get >>ONE<< set of bootstrap samples
% The same set is used for all nine quantiles being compared
list = randi(n,nboot,n);

xd = zeros(9,1);
yd = zeros(9,1);
delta = zeros(9,1);
deltaCI = zeros(9,2);
bootdelta = zeros(nboot,1);

for d=1:9
q = d/10;
xd(d) = hd(x,q);
yd(d) = hd(y,q);
delta(d) = xd(d) - yd(d);
for b=1:nboot
bootdelta(b) = hd(x(list(b,:)),q) - hd(y(list(b,:)),q);
end
delta_bse = std(bootdelta,0);
deltaCI(d,1) = delta(d)-c.*delta_bse;
deltaCI(d,2) = delta(d)+c.*delta_bse;
end

if plotit==1

figure('Color','w','NumberTitle','off');hold on

ext = 0.1*(max(xd)-min(xd));
plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line
for qi = 1:9
plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2)
end
% mark median
v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:')
plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1)
set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext])
box on
xlabel('Group 1 deciles','FontSize',16)
ylabel('Group 1 - group 2 deciles','FontSize',16)

end

%% TEST
% Data from Wilcox p.187
% time1=[0 32 9 0 2 0 41 0 0 0 6 18 3 3 0 11 11 2 0 11];
% time3=[0 25 10 11 2 0 17 0 3 6 16 9 1 4 0 14 7 5 11 14];
104 changes: 104 additions & 0 deletions shiftdhd_pbci.m
@@ -0,0 +1,104 @@
function [xd, yd, delta, deltaCI] = shiftdhd_pbci(x,y,q,nboot,alpha,plotit)
% [xd, yd, delta, deltaCI] = shiftdhd_pbci(x,y,q,nboot,alpha,plotit)
% Computes a shift function for two dependent groups by comparing
% the quantiles of the marginal distributions using the
% Harrell-Davis quantile estimator in conjunction with a percentile
% bootstrap approach.
%
% INPUTS:
% - x & y are vectors of the same length without missing values
% - q = quantiles to estimate - default = 0.1:0.1:.9 (deciles)
% - nboot = number of bootstrap samples - default = 2000
% - alpha = expected long-run type I error rate - default = 0.05
% - plotit = 1 to get a figure; 0 otherwise by default
%
% OUTPUTS:
% - xd & yd = vectors of quantiles
% - delta = vector of differences between quantiles (x-y)
% - deltaCI = matrix quantiles x low/high bounds of the confidence
% intervals of the quantile differences
%
% Unlike shiftdhd:
% - the confidence intervals are not corrected for multiple comparisons
% (the R version provides corrected critical p values - here i've decided
% not to provide p values at all - the goal is to understand how
% distributions differ, not to make binary decisions)
% - the confidence intervals are calculated using a percentile bootstrap of
% the quantiles, instead of a percentile bootstrap of the standard error of the quantiles
% - the quantiles to compare can be specified and are not limited to the
% deciles
% - Tied values are allowed
%
% Unlike Rand Wilcox's Dqcomhd R function, no p value is returned.
% Extensive experience suggests humans cannot be trusted with p
% values.
%
% Adaptation of Rand Wilcox's Dqcomhd R function,
% http://dornsife.usc.edu/labs/rwilcox/software/
%
% See also HD, SHIFTDHD, SHIFTHD_PBCI

% Copyright (C) 2016 Guillaume Rousselet - University of Glasgow
% GAR 2016-06-16 - first version

if nargin<3 || isempty(q)
q = .1:.1:.9;
end
if nargin<4 || isempty(nboot)
nboot = 2000;
end
if nargin<5 || isempty(alpha)
alpha = 0.05;
end
if nargin<6 || isempty(plotit)
plotit = 0;
end

Nq = numel(q);
xd = zeros(Nq,1);
yd = zeros(Nq,1);
delta = zeros(Nq,1);
deltaCI = zeros(Nq,2);
Nx = numel(x);
Ny = numel(y);
if Nx ~= Ny
error('shiftdhd_pbci: x and y must be the same length')
end
bootdelta = zeros(Nq,nboot);

lo = round(nboot.*alpha./2); % get CI boundary indices
hi = nboot - lo;
lo = lo+1;

for qi = 1:Nq
xd(qi) = hd(x,q(qi));
yd(qi) = hd(y,q(qi));
delta(qi) = xd(qi) - yd(qi);
for b = 1:nboot
bootsample = randi(Nx,1,Nx); % sample pairs of observations
bootdelta(qi,b) = hd(x(bootsample),q(qi)) - hd(y(bootsample),q(qi));
end
end

bootdelta = sort(bootdelta,2); % sort in ascending order
deltaCI(:,1) = bootdelta(:,lo);
deltaCI(:,2) = bootdelta(:,hi);

if plotit == 1

figure('Color','w','NumberTitle','off');hold on

ext = 0.1*(max(xd)-min(xd));
plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line
for qi = 1:Nq
plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2)
end
% mark median
v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:')
plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1)
set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext])
box on
xlabel('Group 1 quantiles','FontSize',16)
ylabel('Group 1 - group 2 quantiles','FontSize',16)

end
98 changes: 98 additions & 0 deletions shifthd.m
@@ -0,0 +1,98 @@
function [xd, yd, delta, deltaCI] = shifthd(x,y,nboot,plotit)
% [xd yd delta deltaCI] = shifthd(x,y,nboot,plotshift)
% SHIFTHD computes the 95% simultaneous confidence intervals
% for the difference between the deciles of two independent groups
% using the Harrell-Davis quantile estimator, in conjunction with
% percentile bootstrap estimation of the quantiles' standard errors.
% See Wilcox Robust hypothesis testing book 2005 p.151-155
%
% INPUTS:
% - x & y are vectors of the same length without missing values
% - nboot = number of bootstrap samples - default = 200
% - plotit = 1 to get a figure; 0 otherwise by default
%
% OUTPUTS:
% - xd & yd = vectors of quantiles
% - delta = vector of differences between quantiles (x-y)
% - deltaCI = matrix quantiles x low/high bounds of the confidence
% intervals of the quantile differences
%
% Tied values are not allowed.
% If tied values occur, or if you want to estimate quantiles other than the deciles,
% or if you want to use an alpha other than 0.05, use SHIFTHD_PBCI instead.
%
% Adaptation of Rand Wilcox's shifthd R function,
% http://dornsife.usc.edu/labs/rwilcox/software/
%
% See also HD, SHIFTDHD, SHIFTHD_PBCI

% Copyright (C) 2007, 2013, 2016 Guillaume Rousselet - University of Glasgow
% GAR, University of Glasgow, Dec 2007
% GAR, April 2013: replace randsample with randi
% GAR, June 2013: added list option
% GAR, 29 May 2016: removed call to bootse, removed list option and fixed bootstrap sampling error -
% use independent bootstrap sample for each decile.
% GAR, 17 Jun 2016: edited help for github release

if nargin < 3;nboot=200;end
if nargin < 4;plotit=0;end

nx=length(x);
ny=length(y);
n=min(nx,ny);
c=(80.1./n.^2)+2.73; % The constant c was determined so that the simultaneous
% probability coverage of all 9 differences is
% approximately 95% when sampling from normal
% distributions

xd = zeros(9,1);
yd = zeros(9,1);
delta = zeros(9,1);
deltaCI = zeros(9,2);

for d = 1:9

q = d./10;
xd(d) = hd(x,q);
yd(d) = hd(y,q);

xboot = zeros(nboot,1);
yboot = zeros(nboot,1);
xlist=randi(nx,nboot,nx);
ylist=randi(ny,nboot,ny);

% Get a different set of bootstrap samples for each decile
% and each group
for B = 1:nboot
xboot(B) = hd(x(xlist(B,:)),q);
yboot(B) = hd(y(ylist(B,:)),q);
end

sqrt_var_sum = sqrt( var(xboot) + var(yboot) );
delta(d) = xd(d)-yd(d);
deltaCI(d,1) = delta(d)-c.*sqrt_var_sum;
deltaCI(d,2) = delta(d)+c.*sqrt_var_sum;
end

if plotit==1

figure('Color','w','NumberTitle','off');hold on

ext = 0.1*(max(xd)-min(xd));
plot([min(xd)-ext max(xd)+ext],[0 0],'LineWidth',1,'Color',[.5 .5 .5]) % zero line
for qi = 1:9
plot([xd(qi) xd(qi)],[deltaCI(qi,1) deltaCI(qi,2)],'k','LineWidth',2)
end
% mark median
v = axis;plot([xd(5) xd(5)],[v(3) v(4)],'k:')
plot(xd,delta,'ko-','MarkerFaceColor',[.9 .9 .9],'MarkerSize',10,'LineWidth',1)
set(gca,'FontSize',14,'XLim',[min(xd)-ext max(xd)+ext])
box on
xlabel('Group 1 quantiles','FontSize',16)
ylabel('Group 1 - group 2 quantiles','FontSize',16)

end

% Data from Wilcox 2005 p.150
% control=[41 38.4 24.4 25.9 21.9 18.3 13.1 27.3 28.5 -16.9 26 17.4 21.8 15.4 27.4 19.2 22.4 17.7 26 29.4 21.4 26.6 22.7];
% ozone=[10.1 6.1 20.4 7.3 14.3 15.5 -9.9 6.8 28.2 17.9 -9 -12.9 14 6.6 12.1 15.7 39.9 -15.9 54.6 -14.7 44.1 -9];

0 comments on commit 2a9688f

Please sign in to comment.