diff --git a/dabest/_classes.py b/dabest/_classes.py index 62bc6e03..c2d53d2e 100644 --- a/dabest/_classes.py +++ b/dabest/_classes.py @@ -857,23 +857,31 @@ def _all_plot_groups(self): class DeltaDelta(object): """ - A class to compute and store the delta-delta statistics. In a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, two primary deltas are first calculated with one independent variable and a delta-delta effect size is calculated as a difference between the two primary deltas. + A class to compute and store the delta-delta statistics for experiments with a 2-by-2 arrangement where two independent variables, A and B, each have two categorical values, 1 and 2. The data is divided into two pairs of two groups, and a primary delta is first calculated as the mean difference between each of the pairs: .. math:: - \\hat{\\theta}_{B1} = \\overline{X}_{A2, B1} - \\overline{X}_{A1, B1} + \\Delta_{1} = \\overline{X}_{A_{2}, B_{1}} - \\overline{X}_{A_{1}, B_{1}} - \\hat{\\theta}_{B2} = \\overline{X}_{A2, B2} - \\overline{X}_{A1, B2} + \\Delta_{2} = \\overline{X}_{A_{2}, B_{2}} - \\overline{X}_{A_{1}, B_{2}} + where :math:`\overline{X}_{A_{i}, B_{j}}` is the mean of the sample with A = i and B = j, :math:`\\Delta` is the mean difference between two samples. + + A delta-delta value is then calculated as the mean difference between the two primary deltas: + .. math:: - \\hat{\\theta}_{\\theta} = \\hat{\\theta}_{B2} - \\hat{\\theta}_{B1} + \\Delta_{\\Delta} = \\Delta_{B_{2}} - \\Delta_{B_{1}} and: + and the standard deviation of the delta-delta value is calculated from a pooled variance of the 4 samples: + .. math:: - s_{\\theta} = \\frac{(n_{A2, B1}-1)s_{A2, B1}^2+(n_{A1, B1}-1)s_{A1, B1}^2+(n_{A2, B2}-1)s_{A2, B2}^2+(n_{A1, B2}-1)s_{A1, B2}^2}{(n_{A2, B1} - 1) + (n_{A1, B1} - 1) + (n_{A2, B2} - 1) + (n_{A1, B2} - 1)} + s_{\\Delta_{\\Delta}} = \\sqrt{\\frac{(n_{A_{2}, B_{1}}-1)s_{A_{2}, B_{1}}^2+(n_{A_{1}, B_{1}}-1)s_{A_{1}, B_{1}}^2+(n_{A_{2}, B_{2}}-1)s_{A_{2}, B_{2}}^2+(n_{A_{1}, B_{2}}-1)s_{A_{1}, B_{2}}^2}{(n_{A_{2}, B_{1}} - 1) + (n_{A_{1}, B_{1}} - 1) + (n_{A_{2}, B_{2}} - 1) + (n_{A_{1}, B_{2}} - 1)}} + + where :math:`s` is the standard deviation and :math:`n` is the sample size. Example ------- @@ -887,16 +895,16 @@ class DeltaDelta(object): >>> y = norm.rvs(loc=3, scale=0.4, size=N*4) >>> y[N:2*N] = y[N:2*N]+1 >>> y[2*N:3*N] = y[2*N:3*N]-0.5 - >>> # Add drug column + >>> # Add a `Treatment` column >>> t1 = np.repeat('Placebo', N*2).tolist() >>> t2 = np.repeat('Drug', N*2).tolist() >>> treatment = t1 + t2 - >>> # Add a `rep` column as the first variable for the 2 replicates of experiments done + >>> # Add a `Rep` column as the first variable for the 2 replicates of experiments done >>> rep = [] >>> for i in range(N*2): >>> rep.append('Rep1') >>> rep.append('Rep2') - >>> # Add a `genotype` column as the second variable + >>> # Add a `Genotype` column as the second variable >>> wt = np.repeat('W', N).tolist() >>> mt = np.repeat('M', N).tolist() >>> wt2 = np.repeat('W', N).tolist() @@ -909,10 +917,12 @@ class DeltaDelta(object): >>> df_delta2 = pd.DataFrame({'ID' : id_col, >>> 'Rep' : rep, >>> 'Genotype' : genotype, - >>> 'Drug': treatment, + >>> 'Treatment': treatment, >>> 'Y' : y >>> }) - + >>> unpaired_delta2 = dabest.load(data = df_delta2, x = ["Genotype", "Genotype"], y = "Y", delta2 = True, experiment = "Treatment") + >>> unpaired_delta2.mean_diff.plot() + diff --git a/docs/source/deltadelta.rst b/docs/source/deltadelta.rst index 709c3cbf..84168bd1 100644 --- a/docs/source/deltadelta.rst +++ b/docs/source/deltadelta.rst @@ -35,7 +35,7 @@ Effectively, we have 4 groups of subjects for comparison. - Wildtype + Wild type Mutant @@ -60,7 +60,7 @@ Effectively, we have 4 groups of subjects for comparison. -There are 2 ``Treatment`` conditions, ``Placebo`` (control group) and ``Drug`` (test group). There are 2 ``Genotype`` s: ``W`` (wildtype population) and ``M`` (mutant population). In addition, each experiment was done twice (``Rep1`` and ``Rep2``). We shall do a few analyses to visualise these differences in a simulated dataset. +There are 2 ``Treatment`` conditions, ``Placebo`` (control group) and ``Drug`` (test group). There are 2 ``Genotype``\s: ``W`` (wild type population) and ``M`` (mutant population). In addition, each experiment was done twice (``Rep1`` and ``Rep2``). We shall do a few analyses to visualise these differences in a simulated dataset. Simulate a dataset ------------------ @@ -83,18 +83,18 @@ Simulate a dataset y[N:2*N] = y[N:2*N]+1 y[2*N:3*N] = y[2*N:3*N]-0.5 - # Add drug column + # Add a `Treatment` column t1 = np.repeat('Placebo', N*2).tolist() t2 = np.repeat('Drug', N*2).tolist() treatment = t1 + t2 - # Add a `rep` column as the first variable for the 2 replicates of experiments done + # Add a `Rep` column as the first variable for the 2 replicates of experiments done rep = [] for i in range(N*2): rep.append('Rep1') rep.append('Rep2') - # Add a `genotype` column as the second variable + # Add a `Genotype` column as the second variable wt = np.repeat('W', N).tolist() mt = np.repeat('M', N).tolist() wt2 = np.repeat('W', N).tolist() @@ -112,7 +112,7 @@ Simulate a dataset df_delta2 = pd.DataFrame({'ID' : id_col, 'Rep' : rep, 'Genotype' : genotype, - 'Drug': treatment, + 'Treatment': treatment, 'Y' : y }) @@ -206,8 +206,7 @@ for slopegraphs. We use the ``experiment`` input to specify grouping of the data .. code-block:: python3 :linenos: - unpaired_delta2 = dabest.load(data = df_delta2, x = ["Genotype", "Genotype"], y = "Y", delta2 = True, - experiment = "Drug") + unpaired_delta2 = dabest.load(data = df_delta2, x = ["Genotype", "Genotype"], y = "Y", delta2 = True, experiment = "Treatment") The above function creates the following object: @@ -279,26 +278,31 @@ administered, the mutant phenotype is around 1.23 [95%CI 0.948, 1.52]. This diff and ``Drug`` group are plotted at the right bottom with a separate y-axis from other bootstrap plots. This effect size, at about -0.903 [95%CI -1.26, -0.535], is the net effect size of the drug treatment. That is to say that treatment with drug A reduced disease phenotype by 0.903. +Mean difference between mutants and wild types given the placebo treatment is: + .. math:: - \hat{\theta}_{P} = \overline{X}_{P, M} - \overline{X}_{P, W} + \Delta_{1} = \overline{X}_{P, M} - \overline{X}_{P, W} + +Mean difference between mutants and wild types given the drug treatment is: - \hat{\theta}_{D} = \overline{X}_{D, M} - \overline{X}_{D, W} - .. math:: + \Delta_{2} = \overline{X}_{D, M} - \overline{X}_{D, W} - \hat{\theta}_{\theta} = \hat{\theta}_{D} - \hat{\theta}_{P} +The net effect of the drug on mutants is: -and: - .. math:: - s_{\theta} = \frac{(n_{P, M}-1)s_{P, M}^2+(n_{P, W}-1)s_{P, W}^2+(n_{D, M}-1)s_{D, M}^2+(n_{D, M}-1)s_{D, M}^2}{(n_{P, M} - 1) + (n_{P, W} - 1) + (n_{D, M} - 1) + (n_{D, M} - 1)} + \Delta_{\Delta} = \Delta_{2} - \Delta_{1} + + +where :math:`\overline{X}` is the sample mean, :math:`\Delta` is the mean difference. -where :math:`\overline{X}` is the sample mean, :math:`\hat{\theta}` is the mean difference, :math:`s` is the variance and :math:`n` is the sample size. +Specifying Grouping for Comparisons +----------------------------------- In the example above, we used the convention of "test - control' but you can manipulate the orders of experiment groups as well as the horizontal axis variable by setting ``experiment_label`` and ``x1_level``. @@ -334,28 +338,29 @@ We produce the following plot: .. image:: _images/tutorial_108_0.png -We see that the drug had a non-specific effect of -0.321 [95%CI -0.498, -0.131] on wildtype subjects even when they were not sick, and it had a bigger effect of -1.22 [95%CI -1.52, -0.906] in mutant subjects. In this visualisation, we can see the delta-delta value of -0.903 [95%CI -1.21, -0.587] as the net effect of the drug accounting for non-specific actions in healthy individuals. +We see that the drug had a non-specific effect of -0.321 [95%CI -0.498, -0.131] on wild type subjects even when they were not sick, and it had a bigger effect of -1.22 [95%CI -1.52, -0.906] in mutant subjects. In this visualisation, we can see the delta-delta value of -0.903 [95%CI -1.21, -0.587] as the net effect of the drug accounting for non-specific actions in healthy individuals. -.. math:: - - \hat{\theta}_{W} = \overline{X}_{D, W} - \overline{X}_{P, W} - \hat{\theta}_{W} = \overline{X}_{D, M} - \overline{X}_{P, M} +Mean difference between drug and placebo treatments in wild type subjects is: .. math:: - \hat{\theta}_{\theta} = \hat{\theta}_{M} - \hat{\theta}_{W} - -and: + \Delta_{1} = \overline{X}_{D, W} - \overline{X}_{P, W} + +Mean difference between drug and placebo treatments in mutant subjects is: .. math:: - s_{\theta} = \frac{(n_{D, W}-1)s_{D, W}^2+(n_{P, W}-1)s_{P, W}^2+(n_{D, M}-1)s_{D, M}^2+(n_{P, M}-1)s_{P, M}^2}{(n_{D, W} - 1) + (n_{P, W} - 1) + (n_{D, M} - 1) + (n_{P, M} - 1)} + \Delta_{2} = \overline{X}_{D, M} - \overline{X}_{P, M} +The net effect of the drug on mutants is: -where :math:`\overline{X}` is the sample mean, :math:`\hat{\theta}` is the mean difference, :math:`s` is the variance and :math:`n` is the sample size. +.. math:: + \Delta_{\Delta} = \Delta_{2} - \Delta_{1} + +where :math:`\overline{X}` is the sample mean, :math:`\Delta` is the mean difference. Connection to ANOVA