appendix-permut-test-fdr.Rnw


\subsection{Permutation tests and t-test}
\begin{frame}[label=permtest]
  \frametitle{The logic of a permutation test}
  \begin{itemize}
  \item Define the statistic (e.g., differences between means).
  \item Obtain their distribution under the null hypothesis ($H_0$).
  \item Calculate how likely our observed statistic is under the null hypothesis.

    \vspace*{20pt}
  
    \item (Permutation tests are very general approaches that can be used
      for testing a variety of hypothesis ---e.g., Dupuy and Simon
      paper--- but their use in real life requires a lot of care.)
  \end{itemize}
\end{frame}


\begin{frame}
  \frametitle{The logic of a permutation test}
  \begin{itemize}
  \item Key idea: under $H_0$ labels and values are not related.
  \item That's it! 
  \item How could we generate a data set that is compatible with $H_0$?
  \item And another? \ldots
  \end{itemize}
\end{frame}


\begin{frame}
\frametitle{t-test to compare two groups}
\begin{enumerate}
\item Compute the means
\item Subtract one from the other
\item Compute a quantity related to the variance of the differences of the
  means (this comes from the variance of each group).
\item Divide the difference in means by the standard deviation of the
  difference in means.
\item Now we have a standardized difference: the t statistic.
\end{enumerate}
\end{frame}

\begin{frame}[fragile]
  \frametitle{Differences between both procedures? }

  \begin{itemize}
    \item With a t-test: if certain assumptions are true, there is a
      statistic of know distribution under  $H_0$. From here, the p-value
      is immediate.
      
      \item permutation test: we define a statistic. We do not derive
        analytically its distribution. We obtain it numerically by
        counting events generated under $H_0$. 
\item Permutation tests are not ``assumption free''!!! 
\item permutation tests might be testing a hypothesis we are not
  interested in  (e.g., dispersion vs.\ mean).
\item Some assumptions of parametric tests might be verifiable and/or
  reasonable. And parametric models give us extra stuff  (model checking). 
\item Numerically: are results similar?
  \end{itemize}
\end{frame}


\subsection{FDR: algorithm, numerical examples}

\begin{frame}[label=FDR-appendix]
\frametitle{FDR: the algorithm}
\begin{center}
\includegraphics[%
    width=9.8cm,
    keepaspectratio]{fdr2.png}
\end{center}

(Reiner et al., 2003, Bioinformatics)

\vspace*{20pt}
(Note: a ``step-up procedure'').

\vspace*{20pt}

This procedure controls FDR. (Does not say ``estimates'').

\end{frame}


%%% FIXME zz: reordenar de menor a mayor!!

\begin{frame}
\frametitle{Examples}
What will I reject at 0.1?
\begin{itemize}
\item 0.1, 0.1, 0.1, 0.1 
  \item {\footnotesize \mg{all}}
\vspace*{10pt}

  \item 0.1, 0.01, 0.01, 0.01
  \item {\footnotesize \mg{all}}
\vspace*{10pt}

\item 0.2, 0.1, 0.1, 0.1
  \item {\footnotesize \mg{none}}
\item Threshold: 0.1
\item $p \le threshold * i/m$?
\item \og{$0.1 * 3/4 = 0.075$}
\vspace*{10pt}

\item 0.2, 0.075, 0.075, 0.075
  \item {\footnotesize \mg{last three}}
    
    
\end{itemize}
\end{frame}


\begin{frame}
\frametitle{Adjusted p-values}
\begin{quote}
  The results of a multiple testing procedure can be reported as
  multiplicity adjusted p-values. As with the regular p-value, each
  adjusted p-value is compared to the desired significance level, and if
  smaller, the hypothesis is rejected. Therefore, the way adjusted
  p-values are used and interpreted remains conveniently familiar,
  regardless of the adjustment procedure complexity.
\end{quote}

(Reiner et al., 2003, Bioinformatics)

\end{frame}


\begin{frame}
\frametitle{Adjusted p-values: FDR}
\begin{quote}
For an FDR controlling procedure, the adjusted p-value
of an individual hypothesis is the lowest level of FDR for
which the hypothesis is first included in the set of rejected
hypotheses. Thus the adjusted p-value of $P_{(j)}$ using the
BH procedure, is $P^{BH}_{(j)} = \min_{j \le i}\{P_{(i)} \frac{m}{i}\}$.
                   

\end{quote}
(Reiner et al., 2003, Bioinformatics)

\end{frame}


\begin{frame}[fragile=singleslide]
\frametitle{Examples of FDR-adjusted p-values}
\begin{itemize}
\item 0.2, 0.08, 0.08, 0.08
\item \mg{0.2, 0.1067, 0.1067, 0.1067}
\item \og{0.08 * 4/3 = 0.1067; 0.08 * 4/2 = 0.16; \ldots}

<<eval=TRUE,results='hide',tidy=FALSE>>=
 p.adjust(c(0.2, 0.08, 0.08, 0.08), 
          method = "BH")
 @   
  
\vspace*{20pt}
\item 0.2, 0.08, 0.07, 0.07

\item \mg{0.2, 0.1067, 0.1067, 0.1067}
\item \og{0.08 * 4/3 = 0.1067; 0.07 * 4/2 = 0.14;}

<<eval=TRUE,results='hide',tidy=FALSE>>=
p.adjust(c(0.2, 0.08, 0.07, 0.07), 
          method = "BH")
 @   
  
\end{itemize}
\end{frame}


\begin{frame}[fragile=singleslide]
\frametitle{}
\begin{itemize}

\item 0.2, 0.08, 0.05, 0.015
\item \mg{0.2, 0.1067, 0.1, 0.06}
\item \og{0.05 * 4/2 = 0.1; 0.015 * 4/1 = 0.06}

<<eval=TRUE,results='hide',tidy=FALSE>>=
p.adjust(c(0.2, 0.08, 0.05, 0.015), 
         method = "BH")
@   
\end{itemize}
\end{frame}