Skip to content

Commit

Permalink
02-09-2017
Browse files Browse the repository at this point in the history
  • Loading branch information
NineKa committed Mar 27, 2017
1 parent 3ea74bd commit 0b0b0d7
Show file tree
Hide file tree
Showing 10 changed files with 332 additions and 7 deletions.
2 changes: 2 additions & 0 deletions tex/probability_notes.tex
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
\newcommand\bernoullidist[1]{\text{\emph{Bern}}(#1)}
\newcommand\poissondist[1]{\text{\emph{Poi}}(#1)}
\newcommand\geometricdist[1]{\text{\emph{Geo}}(#1)}
\newcommand\negbinomialdist[2]{\text{$\mathbf{N}$\emph{Bin}}(#1, #2)}
\newcommand\hypergeometricdist[3]{\text{\emph{HG}}(#1, #2, #3)}

\newenvironment{solution}[1][\proofname]{
\proof[\ttfamily \scshape \large Solution.]
Expand Down
Binary file added tex/section3/figure/sec3-sub7-fig1.pdf
Binary file not shown.
57 changes: 57 additions & 0 deletions tex/section3/figure/sec3-sub7-fig1.pdf_tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
%% Creator: Inkscape inkscape 0.91, www.inkscape.org
%% PDF/EPS/PS + LaTeX output extension by Johan Engelen, 2010
%% Accompanies image file 'sec3-sub7-fig1.pdf' (pdf, eps, ps)
%%
%% To include the image in your LaTeX document, write
%% \input{<filename>.pdf_tex}
%% instead of
%% \includegraphics{<filename>.pdf}
%% To scale the image, write
%% \def\svgwidth{<desired width>}
%% \input{<filename>.pdf_tex}
%% instead of
%% \includegraphics[width=<desired width>]{<filename>.pdf}
%%
%% Images with a different path to the parent latex file can
%% be accessed with the `import' package (which may need to be
%% installed) using
%% \usepackage{import}
%% in the preamble, and then including the image with
%% \import{<path to file>}{<filename>.pdf_tex}
%% Alternatively, one can specify
%% \graphicspath{{<path to file>/}}
%%
%% For more information, please see info/svg-inkscape on CTAN:
%% http://tug.ctan.org/tex-archive/info/svg-inkscape
%%
\begingroup%
\makeatletter%
\providecommand\color[2][]{%
\errmessage{(Inkscape) Color is used for the text in Inkscape, but the package 'color.sty' is not loaded}%
\renewcommand\color[2][]{}%
}%
\providecommand\transparent[1]{%
\errmessage{(Inkscape) Transparency is used (non-zero) for the text in Inkscape, but the package 'transparent.sty' is not loaded}%
\renewcommand\transparent[1]{}%
}%
\providecommand\rotatebox[2]{#2}%
\ifx\svgwidth\undefined%
\setlength{\unitlength}{562.0070075bp}%
\ifx\svgscale\undefined%
\relax%
\else%
\setlength{\unitlength}{\unitlength * \real{\svgscale}}%
\fi%
\else%
\setlength{\unitlength}{\svgwidth}%
\fi%
\global\let\svgwidth\undefined%
\global\let\svgscale\undefined%
\makeatother%
\begin{picture}(1,0.09569024)%
\put(0,0){\includegraphics[width=\unitlength,page=1]{sec3-sub7-fig1.pdf}}%
\put(0.219231,0.02274994){\makebox(0,0)[lb]{\smash{$X_1$-th trial(first success)}}}%
\put(0.61119266,0.02299534){\makebox(0,0)[lb]{\smash{$X_1 + X_2$-th trial(second success)}}}%
\put(0.93700312,0.0594487){\makebox(0,0)[lb]{\smash{$\dots$}}}%
\end{picture}%
\endgroup%
25 changes: 25 additions & 0 deletions tex/section3/figure/sec3-sub7-fig1.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
\subsection{Problem on Linearity of Expectation}
\begin{example}
When a coin is tossed multiple times, a run of length $k$ means a maximal
sequence of $k$ consecutive heads on tails. For example,
\begin{itemize}[noitemsep, topsep=0em]
\item number of runs $6$: $\langle H \vert TTT \vert HH \vert T \vert H
\vert T \rangle$,
\item number of runs $5$: $\langle H \vert T \vert H \vert T \vert HHH
\rangle$.
\end{itemize}
Let $X_n$ denote the number of runs when a coin is tossed $n$ times. Assume
that probability of landing heads in each toss is $p$. Find $E[X_n]$.
\end{example}

\note You can compute the numbers of runs by scanning the sequence left to
right and add a $+1$ to the current count every time a new run starts. This can
be used to compute $E[X_n]$

\begin{solution}
Note that the first letter always starts a new run, and for $i \geq 2$, the
$i$-th letter starts a new run if and only if the $i$-th letter is different
from the $(i - 1)$-th letter.

Let $Y_1 \coloneqq 1$, and $Y_i \coloneqq 1_{\lbrace i\text{-th letter is
different from }(i - 1)\text{-th letter} \rbrace}$, for $2 \leq i \leq n$.
Then,
\[ X_n = Y_1 + Y_2 + \dots + Y_n
\Rightarrow E[X_n] = E[Y_1] + E[Y_2] + \dots + E[Y_n] \]

Now $E[Y_1] = 1$ and for $2 \leq i \leq n$,
\begin{align*}
E[Y_i] &= E[1_{\lbrace \text{$i$-th letter is different from $(i -
1)$-th letter} \rbrace}] \\
&= P(\text{$i$-th letter is different from $(i - 1)$-th
letter}) \\
&= P(\text{$(i-1)$-th letter is $T$ and $i$-th letter is $H$})+
P(\text{$(i-1)$-th letter is $H$ and $i$-th letter is $T$})\\
&= 2p(1 - p)
\end{align*}
\[ \Rightarrow E[X_n] = 1 + 2 (n - 1) \cdot p( 1 - p) \]
\end{solution}
6 changes: 5 additions & 1 deletion tex/section3/section3_discrete_random_variables.tex
Original file line number Diff line number Diff line change
Expand Up @@ -212,4 +212,8 @@ \section{Discrete Random Variables (RVs)}
\input{section3/section3_sub4_binomial_dist}
\input{section3/section3_sub5_poisson_dist}
\input{section3/section3_sub6_geometric_dist}
\input{section3/section3_sub7_problem_on_linearity_of_expectation}
\input{section3/section3_sub7_negative_binomial_dist}
\input{section3/section3_sub8_hypergeometric_dist}

%% pending section
%% section3/pending/section3_sub7_problem_on_linearity_of_expectation.tex
10 changes: 5 additions & 5 deletions tex/section3/section3_sub6_geometric_dist.tex
Original file line number Diff line number Diff line change
Expand Up @@ -118,11 +118,11 @@ \subsection{Geometric Distribution}
= \frac{6}{11}
\end{align*}
\item
\begin{align*}
\[
P(Y = 3 \vert \text{Player $1$ moves first})
&= \frac{P(Y = 3)}{\frac{6}{11}} \\
&= (\frac{5}{6})^2 \cdot \frac{1}{6} \cdot \frac{11}{6} \\
&= \frac{275}{1296}
\end{align*}
= \frac{P(Y = 3)}{\frac{6}{11}}
= (\frac{5}{6})^2 \cdot \frac{1}{6} \cdot \frac{11}{6}
= \frac{275}{1296}
\]
\end{enumerate}
\end{solution}
103 changes: 103 additions & 0 deletions tex/section3/section3_sub7_negative_binomial_dist.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
\subsection{Negative Binomial Distribution}
This is a generalization of the geometric distribution.
\begin{definition}
An random variable $Y$ follows a negative binomial distribution with
parameters $r$ and $p$, where $r \in \lbrace 1, 2, \dots \rbrace$ and $p
\in [0, 1]$, if its pmf is given by,
\[ p_Y(k) = P(Y = k) = {{k - 1} \choose {r - 1}} p^r (1 - p)^{k - r}
\quad k = r, r+1, r+2, \dots \]
\end{definition}
This is a pmf, since,
\[ \sum_{k = r}^\infty p_Y(k)
= \sum_{k = r}^\infty {{k - 1} \choose {r - 1}} p^r (1 - p)^{k - r}
= p^r ((1 - (1 - p))^{-r}
= p^r \cdot p^{-r}
= 1 \]

\noindent
\textbf{Interpretation}: Same setup as a geometric distribution:
\begin{enumerate}[noitemsep, topsep=0em]
\item infinite sequence of independent and identical trials;
\item each trial has two possible outcomes - success and failure;
\item probability of success in each trial is $p$, and probability of
failure in each trial is $(1 - p)$.
\end{enumerate}
If the $r$-th success occurs in the $y$-th trial, then,
\[ Y \sim \negbinomialdist{r}{p} \]
(When $r = 1$, we get a $\binomialdist{p}$ distribution). As, for $k \geq n$
\[
P(Y = k) = P(\lbrace \text{success in $k$-th trial} \rbrace \cap
\lbrace \text{$r-1$ success in the first $k-1$ trials}\rbrace)
\]
Note that the probability of any sequence of successes and failures of length
$k - 1$ that has $r - 1$ success and $k - r$ failures is $p^{r - 1}(1 - p)^{k -
r}$. Thus,
\begin{align*}
P(Y = k) &= p \cdot p^{r - 1}(1 - p)^{k - r} \cdot
(\# \text{sequences of length $k - 1$ that have $r - 1$
successes and $k - r$ failures}) \\
&= p^r \cdot (1 - p)^{k - r} \cdot {{k - 1} \choose {r - 1}}
\end{align*}
Suppose the first success occurs at the $X_1$-th trial, and then the second
success occurs $X_2$ many trials after that. Define the random variables,
$X_2, X_3, \dots, X_r$ similarly.
\begin{figure*}[!htp]
\centering
\def\svgwidth{\textwidth}
\includesvg[./section3/figure/]{sec3-sub7-fig1}
\end{figure*}

\noindent
Note that,
\[ X_1 \sim \geometricdist{p} \quad X_2 \sim \geometricdist{p} \quad
\dots \quad X_r \sim \geometricdist{p} \]
and
\[ Y = X_1 + \dots + X_r \]
Thus a $\negbinomialdist{r}{p}$ random variable can be expressed as a sum of
$r$ $\geometricdist{p}$ random variables.

\begin{theorem}
If $Y \sim \negbinomialdist{r}{p}$, then,
\[ E[Y] = \frac{r}{p} \qquad V[Y] = \frac{r(1 - p)}{p^2} \]
\end{theorem}
\begin{proof}
If $Y \sim \negbinomialdist{r}{p}$ then,
\[ E[Y] = E[X_1 + X_2 + \dots + x_r] \]
where $X_i \sim \geometricdist{p} \quad i = 1, 2, \dots, r$. Hence,
\[ E[Y] = E[X_1] + E[X_2] + \dots + E[X_r]
= \frac{r}{p} \]
Alternatively,
\begin{align*}
E[Y] &= \sum_{k = r}^\infty k \cdot {{k - 1} \choose {r - 1}} p^r
(1 - p)^{k - r} \\
&= \sum_{k = r + 1}^\infty (k - r) \cdot {{k - 1} \choose {r - 1}}
p^r \cdot (1 - p)^{k - r} + r \sum_{k = r}^\infty {{k - 1}
\choose {r - 1}} p^r (1 - p)^{k - r} \\
&= p^r (1 - p) (\sum_{k = r + 1}^\infty (k - r){{k - 1} \choose
{r - 1}} (1 - p)^{k - r - 1}) + r \\
&= p^r (1 - p) (- \frac{\delta}{\delta p} \sum_{k = r}^\infty
{{k - 1} \choose {r - 1}} \cdot (1 - p)^{k - r}) + r \\
&= p^r (1 - p)(- \frac{\delta}{\delta p} (1 - (1 - p))^{-r})
+ r \\
&= p^r (1 - p) \cdot \frac{r}{p^{r + 1}} + r \\
&= \frac{r(1 - p)}{p} + r
= \frac{r}{p}
\end{align*}
\[ V[Y] = E[Y^2] - (E[Y])^2 = E[Y^2] - \frac{r^2}{p^2} \]
To compute $E[Y^2]$, one can use a similar technique, $i.e.$ differentiate
the power series twice. The calculation is slightly larger.
\end{proof}

\begin{example}
A radio station asks a question with four possible choices for its answers
and asks people to call and answer the question. The second caller to answer
correctly will win a special price. Assuming that people just make a random
guess about the answer, find the probability that the fifth caller will win the
prize.
\end{example}
\begin{solution}
If the $X$-th caller wins the prize, then $X \sim
\negbinomialdist{2}{\frac{1}{4}}$. Hence,
\[ P(X = 5) = {4 \choose 1} (\frac{1}{4})^2 (\frac{3}{4})^3
= \frac{27}{256} \]
\end{solution}

This file was deleted.

94 changes: 94 additions & 0 deletions tex/section3/section3_sub8_hypergeometric_dist.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
\subsection{Hypergeometric Distribution}
\begin{definition}
Suppose $\mathbf{N}, n$ and $r$ are positive integers such that $n \leq
\mathbf{N}$ and $r \leq \mathbf{N}$. Then an random variable $Y$ follows a
hypergeometric distribution with parameters $\mathbf{N}$, $n$ and $r$, or
simply $Y \sim \hypergeometricdist{\mathbf{N}}{n}{r}$, if
\[ p_Y(k) =
P(Y = k) = \frac{{r \choose k} {{\mathbf{N} - r} \choose {n - k}}}
{{\mathbf{N} \choose n}} \]
where $k$ is an integer such that, $0 \leq k \leq r$ and $0 \leq n - k \leq
\mathbf{N} - r$.
\end{definition}

\noindent
This is indeed a pmf. Clearly, $P(Y = k) \geq 0$. To see why $\sum_{k = 0}^n
P(Y = k) = 1$, note that,
\[ (1 + x)^\mathbf{N} = (1 + x)^r \cdot (1 + x)^{\mathbf{N} - r} \]
and the coefficient of $x^n$ on the left side is ${\mathbf{N} \choose n}$,
where as the right side can be expanded as, $(\sum_{j = 0}^r {r \choose j}
\cdot x^j) \cdot (\sum_{s = 0}^{\mathbf{N} - r} {{\mathbf{N} - r} \choose s}
\cdot x^s)$, and hence the coefficient of $x^n$ on the right side is
$\sum_{k = 0 \lor (n + r - \mathbf{N})}^{r \land n} {r \choose k} \cdot
{{\mathbf{N} - r} \choose {n - k}}$. Hence,
\[
\sum_{k = 0 \lor (n + r - \mathbf{N})}^{r \land n}
{r \choose k} \cdot
{{\mathbf{N} - r} \choose {n - k}}
=
{{\mathbf{N}} \choose n}
\]
which shows that $\sum P(Y = k) = 1$.

\note There is a simple combinatorial argument for proving the above identity
which we will do next.

\noindent
\textbf{Interpretation}: suppose an jar contains $\mathbf{N}$ distinguishable
balls of which $r$ are red and $(\mathbf{N} - r)$ are blue, and you select an
unordered sample of $n$ balls without replacement at random. If $Y$ denotes the
number of red balls in the sample, then $Y \sim
\hypergeometricdist{\mathbf{N}}{n}{r}$.

Total number of possible samples is $\mathbf{N} \choose n$. The number of
sample in which there are exactly $k$ read and $(n - k)$ blue balls is ${r
\choose k} {{\mathbf{N} - r} \choose {n - k}}$. Hence the claim follows.

\begin{theorem}[Binomial approximation to Hypergeometric Distribution]
If $\mathbf{N} \rightarrow \infty$, $\frac{r_\mathbf{N}}{\mathbf{N}}
\rightarrow p \in (0, \infty)$, $n$ is kept fixed, and $Y_{\mathbf{N}, n,
r_\mathbf{N}} \sim \hypergeometricdist{\mathbf{N}}{n}{r_\mathbf{N}}$, then
\[
\lim_{\mathbf{N} \rightarrow \infty} P(Y_{\mathbf{N}, n, r_\mathbf{N}} = k)
= {n \choose k} \cdot p^k \cdot (1 - p)^{n - k}
\]
for $k = 0, 1, \dots, n$.
\end{theorem}
\note An unordered sample (without replacement) of $n$ balls out of
$\mathbf{N}$ many balls can be drawn by sequentially drawing $n$ balls without
replacement and then ignoring the order in which they were drawn. The theorem
above essentially says that if both $\mathbf{N}$ and $r_\mathbf{N}$ are large,
such that $\frac{r_\mathbf{N}}{\mathbf{N}} \approx p$, then sampling without
replacement is almost equivalent to sampling with replacement which corresponds
to binomial distribution with parameters $n$ and $p$.
\begin{proof}
First note that the bounds $0 \lor (n + r_\mathbf{N} - \mathbf{N}) \leq k
\leq r_\mathbf{N} \land n$ correspond to $0 \leq k \leq n$ in the case
$\mathbf{N} \rightarrow \infty$ limit. Now for every $0 \leq k \leq n$,
\begin{align*}
\frac{{r_\mathbf{N} \choose k}{{N - r_\mathbf{N}} \choose {n -k}}}
{{\mathbf{N} \choose n}}
&= \frac{r_\mathbf{N} (r_\mathbf{N} - 1) \dots (r_\mathbf{N} - k + 1)}{k!}
\cdot
\frac{(\mathbf{N} - r_\mathbf{N}) \dots (\mathbf{N} - r_\mathbf{N} - n
+ k + 1)}{(n-k)!}
\cdot
\frac{n!}{\mathbf{N}(\mathbf{N} - 1) \dots (\mathbf{N} - n + 1)} \\
&\approx \frac{r_\mathbf{N}^k}{k!} \cdot
\frac{(\mathbf{N} - r_\mathbf{N})^{n - k}}{(n - k)!} \cdot
\frac{n!}{\mathbf{N}^n} \\
&= \frac{r_\mathbf{N}^k \mathbf{N}^{n - k}}{\mathbf{N}^n} \cdot
\frac{n!}{k! (n - k)!} \cdot
(1 - \frac{r_\mathbf{N}}{\mathbf{N}})^{n - k} \\
&\approx p^k \cdot {n \choose k} \cdot (1 - p)^{n - k}
\end{align*}
\end{proof}

\begin{theorem}
If $Y \sim \hypergeometricdist{\mathbf{N}}{n}{r}$, then,
\[ E[Y] = \frac{n \cdot r}{\mathbf{N}} \qquad
V[Y] = n \cdot
\frac{r}{\mathbf{N}} \cdot
\frac{\mathbf{N} - r}{\mathbf{N}} \cdot
\frac{\mathbf{N} - n}{\mathbf{N} - 1} \]
\end{theorem}

0 comments on commit 0b0b0d7

Please sign in to comment.