/
notes.tex
559 lines (493 loc) · 32.7 KB
/
notes.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
\documentclass[11pt]{article}
% Packages
\usepackage[margin=2cm]{geometry}
\usepackage{amsmath}
\usepackage{enumitem}
\usepackage{amsfonts}
\usepackage{amsthm}
% Examples, definitions, theorems, etc
\theoremstyle{definition}
\newtheorem{ex}{Example}[section]
\newtheorem{defn}{Definition}[section]
\newtheorem{rmk}{Remark}[section]
\newtheorem{prop}{Proposition}[section]
\newtheorem{lem}{Lemma}[section]
\theoremstyle{theorem}
\newtheorem{thm}{Theorem}[section]
% Short-cuts
\newcommand{\R}[0]{\mathbb{R}}
\newcommand{\N}[0]{\mathbb{N}}
\newcommand{\prob}[1]{\mathbb{P}\left(#1 \right)}
\newcommand{\comp}[1]{{#1}^{\texttt{C}}}
\newcommand{\borel}[0]{\mathcal{B}(\R)}
\newcommand{\pisys}[0]{\mathcal{I}}
\newcommand{\dsys}[0]{\mathcal{D}}
\begin{document}
\begin{center}
\textbf{MATH 587: Advanced Probability Theory} \\
\textbf{Shereen Elaidi}
\end{center}
\section{Review of Probability Spaces}
The standard notation for a probability space is \( ( \Omega, \mathcal{F}, \mathbb{P} ) \). The components of this tuple are:
\begin{enumerate}[noitemsep]
\item \( \Omega \): this is the \textbf{sample space}, which is the collection of ALL possible outcomes. \( \omega \in \Omega \) is a \textbf{sample point}. \( \omega \) corresponds to a specific outcome.
\item \( \mathcal{F} \): this is a \textbf{ \( \sigma \)-algebra}. This is a collection of events. For \( A \in \mathcal{F} \), we call \( A \) an \textbf{event}. \( A \subseteq \Omega \). As we will see later, a \( \sigma \)-algebra is a collection of subsets of \( \Omega \). This satisfies certain conditions.
\item \( \mathbb{P} \): this is a function defined on a sigma algebra.
\begin{align*}
& \mathbb{P}: \mathcal{F} \rightarrow [0,1],\\
& A \in \mathcal{F} \mapsto \mathbb{P}(A) \in [0,1].
\end{align*}
We call \( \prob{A} \) the \textbf{probability of event A}.
\end{enumerate}
\begin{ex}
Consider flipping a fair coin. Then:
\begin{align*}
\Omega & = \{ H, T \}, \\
\mathcal{F} & = \{ \{ H \}, \{ T \}, \{ H, T \}, \emptyset \}, \\
\prob{H} & = \frac{1}{2},\ \prob{T} = \frac{1}{2},\ \prob{ \{H, T \}} = 1,\ \prob{ \emptyset } = 0.
\end{align*}
\end{ex}
\begin{ex}
Will do later, It's annoying to write out.
\end{ex}
\subsection{Measure Theory}
\textbf{Measure theory} is the foundation of modern probability theory. We will define things for a general measure space \( (S, \Sigma, \mu ) \) to replace \( ( \Omega, \mathcal{F}, \mathbb{P} ) \).
\begin{defn}[Algebra]
Let \( S \) be a set. A collection \( \Sigma_0 \) of subsets of \( S \) is called an \textbf{algebra} if:
\begin{enumerate}[noitemsep]
\item \( S \in \Sigma_0 \).
\item \( A \in \Sigma_0 \Rightarrow \comp{A} := S \setminus A \in \Sigma_0 \) (\textbf{closed under complements}).
\item \( \forall n \in \N, A_1, ..., A_n \in \Sigma_0 \Rightarrow \bigcup_{j=1}^n A_j \in \Sigma_0 \) \textbf{(closed under finite unions)}.
\end{enumerate}
\end{defn}
Some remarks: if \( \Sigma_0 \) is an algebra of \( S \), then:
\begin{enumerate}[noitemsep]
\setcounter{enumi}{3}
\item \( \emptyset \in \Sigma_0 \).
\item if \( A, B \in \Sigma_0 \), then \( A \cup B \), \( A \cap B \), \( A \setminus B \), \( A \triangle B \), \( B \setminus A \in \Sigma_0 \).
\item \( \forall n \in \N \), \( A_1, ..., A_n \in \Sigma_0 \Rightarrow \bigcap_{j=1}^n A_j \in \Sigma_0 \).
\end{enumerate}
Note that all of these operations are \underline{finite}.
\begin{defn}[\(\sigma\)-algebra]
A collection of subsets \( \Sigma \) of \( S \) is a \textbf{sigma-algebra} if:
\begin{enumerate}[noitemsep]
\item \( \Sigma \) is an algebra.
\item \( A_1, A_2, ... \in \Sigma \Rightarrow \cup_{j=1}^\infty A_j \in \Sigma \) (closed under countable unions).
\end{enumerate}
\end{defn}
\begin{rmk}
If \( \Sigma \) is a sigma algebra, then \( \Sigma \) satisfies \( (1) \)-\( (6) \) and:
\begin{align}
A_1, A_2, ... \in \Sigma \Rightarrow \bigcap_{j=1}^\infty A_j \in \Sigma.
\end{align}
Very often at this stage, when we want to prove something, we have to go back to the definitions.
\end{rmk}
\begin{defn}[Measurable Space]
The pair \( (S, \Sigma) \) is called a \textbf{measurable space}. A set \( A \in \Sigma \) is a \textbf{measurable set}.
\end{defn}
\begin{defn}[Sigma Algebra Generated]
Let \( \mathcal{C} \) be a collection of subsets of \( S \). The \( \sigma \)-algebra \textbf{generated} by \( \mathcal{C} \), denoted by \( \sigma ( \mathcal{C}) \), is the smallest \( \sigma \)-algebra which is a subset of \( \mathcal{C} \):
\begin{enumerate}[noitemsep]
\item \( \mathcal{C} \subseteq \sigma ( \mathcal{C}) \).
\item if \( \Sigma' \) is a \( \sigma \)-algebra containing \( \mathcal{C} \), then \( \sigma ( \mathcal{C} ) \subseteq \Sigma' \).
\end{enumerate}
\end{defn}
\begin{rmk}
\begin{enumerate}[noitemsep]
\item if \( \mathcal{C} \) is a \( \sigma \)-algebra, then \( \sigma( \mathcal{C} ) = \mathcal{C} \).
\item \( \sigma ( \sigma ( \mathcal{C} )) = \sigma ( \mathcal{C} ) \).
\item if \( \mathcal{C}_1 \subseteq \mathcal{C}_2 \), then \( \sigma (\mathcal{C}_1) \subseteq \sigma (\mathcal{C}_2 ) \).
\end{enumerate}
\end{rmk}
\begin{prop}
\begin{align}
\sigma ( \mathcal{C} ) = \bigcap \left\{ \Sigma\ |\ \Sigma \text{ is a } \sigma-\text{algebra and } \mathcal{C} \in \Sigma \right\}.
\end{align}
\end{prop}
\textbf{Fact}: if \( \{ \Sigma_\alpha\ |\ \alpha \in I \} \) where \( I \) is some index set is any collection of \( \sigma \)-algebras of subsets of \( S \), then \( \bigcap \Sigma_\alpha \) remains a \( \sigma \)-algebra, i.e., intersections of \( \sigma \)-algebras remain \( \sigma \)-algebras.
\begin{ex}
Given \( S \), let \( A, B \subseteq S \). Then, \( \sigma ( \{ A \} ) = \{ A, \comp{A}, \emptyset, S \} \).
\end{ex}
\emph{Question: What do \( \sigma \)-algebras mean for us?}
\newline
\newline
A \( \sigma \)-algebra contains the collection of events we can study. In other words, it tells me the information available to me, from the point of view of probability. If you're not in the \( \sigma \)-algebra, then you're not measurable with respect to the \( \sigma \)-algebra.
\begin{ex}[Borel \( \sigma \)-algebra]
Take \( S = \R \). Then, we define the \textbf{Borel Sigma Algebra} to be:
\begin{align}
\borel := \sigma( \{ \text{ open subsets of } \R \} ).
\end{align}
This applies to any topological space. Equivalently, only for \( \R \), this reduces to:
\begin{align}
\borel = \sigma ( \{ ]a, b [\ |\ a < b,\ a, b \in \R \} ) =: \Sigma_{], [}.
\end{align}
Note that the generating class of \( \borel \) is \emph{not} unique.
\end{ex}
\begin{defn}[\( \pi \)-system]
Let \( S \) be a set. A collection \( \mathcal{I} \) (of subsets of \( S \)) is called a \textbf{\( \pi \)-system} if \( \forall\ A, B \in \mathcal{I} \), one has \( A \cap B \in \mathcal{I} \).
\end{defn}
\begin{defn}
Let \( S \) be a set. A collection \( \dsys \) (of subsets of S) is called a \textbf{\( \dsys \)-system} if:
\begin{enumerate}[noitemsep]
\item \( S \in \dsys \).
\item If \( A, B \in \dsys \) and if \( A \subseteq B \), then \( B \setminus A \in \dsys \).
\item If \( A_n \in \dsys \) for \( n \geq 1 \), and \( A_n \uparrow A \) then \( A \in \dsys \).
\end{enumerate}
\end{defn}
Note that these two definitions mean that we can separate the properties of \( \borel \) into a \( \pi \)-system and a \( \dsys \)-system. This will allow us to further decode a sigma algebra.
\begin{lem}
Let \( \Sigma \) be a collection of subsets of \( S \). Then, \( \Sigma \) is a sigma-algebra \( \iff \) \( \Sigma \) is a \( \pi \)-system and a \( \dsys \)-system.
\end{lem}
\begin{proof}
``\( \Rightarrow \)'': trivial.
\newline
``\( \Leftarrow \)'': We just need to verify that \( \Sigma \) is a \( \sigma \)-algebra.
\begin{enumerate}[noitemsep]
\item \( S \in \Sigma \) follows from the fact that \( \Sigma \) is a d-algebra.
\item If \( A \in \Sigma \), then \( \comp{A} = S \setminus A \in \Sigma \) from (2) of d-system definition.
\item If \( A_n \in \Sigma \) for \( n \geq 1 \), we need to check that \( \bigcup_{n=1}^\infty A_n \in \Sigma \).
\newline
\newline
Set \( B_n := \bigcup_{j=1}^n B_n \). Then, \( B_n \) is an increasing sequence, and \( B_n \in \Sigma \) for all \( n \in \N \) (we know this since we can apply deMorgan's Law, then use the fact that \( \Sigma \) is a \( \pi \)-system and is hence closed under intersections. Now, using (III) of being a d-system, we can conclude:
\begin{align*}
\bigcup_{n=1}^\infty B_n \in \Sigma \Rightarrow \bigcup_{n=1}^\infty B_n = \bigcup_{n=1}^\infty A_n \in \Sigma.
\end{align*}
\end{enumerate}
\end{proof}
\begin{thm}[Dykins \( \pi \)-d lemma]
Suppose that \( \pisys \) is a \( \pi \)-system of subsets of \( S \), and \( d( \pisys ) \) is the d-system generated by \( \pisys \). Then, \( d( \pisys ) = \sigma ( \pisys ) \).
\end{thm}
In words, this theorem is saying that if you start with a \( \pi \)-system generating class, all you need is a d-system and you'll automatically get a \( \sigma \)-algebra.
\begin{proof}
We observe that it is sufficient to show that \( d( \pisys ) \) is a \( \pi \)-system. Why?
\begin{enumerate}[noitemsep]
\item If we manage to show this, by the previous lemma, we would know that \( d ( \pisys ) \) is a \( \sigma \)-algebra, and so by the definition of a \( \sigma \)-algebra one would get that \( \sigma ( \pisys ) \subseteq d ( \pisys ) \).
\item Since \( \sigma ( \pisys ) \) is certainly a d-system, \( d( \pisys ) \subseteq \sigma ( \pisys ) \).
\end{enumerate}
So the GOAL: show that \( d ( \pisys ) \) is a \( \pi \)-system. This proof requires two stages. We will use the \textbf{good set principle}, which is a common technique in set theory. Broadly speaking, you collect all items with a certain property, argue that this collection satisfies certain properties, then show that this collection is actually the whole set. So, our ``good set'' will be defined as follows:
\begin{align*}
\mathcal{D}_1 := \{\ B \in d( \pisys )\ |\ B \cap A \in d (\pisys)\ \forall A\ \in \pisys \}.
\end{align*}
\textbf{Claim:} \( \mathcal{D}_1 \) is a d-system. We need to check against the definition of a d-system.
\begin{enumerate}[noitemsep]
\item \( S \in \mathcal{D}_1 \): satisfied, since \( \forall A \in \pisys \), \( A \cap S = A \in d (\pisys) \) since \( A \in \mathcal{I} \subseteq d ( \mathcal{I}) \).
\item Let \( A_1, A_2 \in \mathcal{D}_1 \). Suppose that \( A_1 \subseteq A_2 \). We want to show that \( A_2 \setminus A_1 \in \mathcal{D}_1 \). So, we need to verify that \( ( A_2 \setminus A_1) \cap A \in d ( \mathcal{I}) \) for any \( A \in \mathcal{I} \):
\begin{align*}
A \cap (A_2 \setminus A_1 ) = ( A_2 \cap A) \setminus (A_1 \cap A ) \in d(\mathcal{I}
\end{align*}
\( ( A_2 \cap A) \) and \( (A_1 \cap A ) \) both belong to \( d (\mathcal{I}) \) since by definition they are in \( \mathcal{D}_1 \). Now, since \( d(\mathcal{I}) \) is a d-system, the difference is in \( d( \mathcal{I} ) \).
\item Finally we need to show that when \( A_n \in \mathcal{D}_1 \), for \( n \geq 1 \) and \( A_n \uparrow A_\infty \), then \( A_\infty \in \mathcal{D}_1 \). However, since \( A \in \mathcal{I} \):
\begin{align*}
\underbrace{A_n \cap A}_{\in d( \mathcal{I} )} \uparrow A_\infty \cap A \Rightarrow A_\infty \cap A \in d( \mathcal{I}) \Rightarrow A_\infty \in \mathcal{D}_1.
\end{align*}
\end{enumerate}
This shows me that \( \mathcal{D}_1 \) forms a d-system. Since \( \mathcal{I} \) is a \( \pi \)-system, \( I \subseteq \mathcal{D}_1 \). This tells me that \( d ( \mathcal{I}) \subseteq \mathcal{D}_1 \). However, \( \mathcal{D}_1 \) is defined using only elements in \( d ( \mathcal{I}) \), which then gives us the second inequality: \( \mathcal{D}_1 \subseteq d ( \mathcal{I}) \). Therefore, \( \forall B \in d( \mathcal{I} ) \) and \( \forall A \in \mathcal{I} \), one has that \( B \cap A \in d( \mathcal{I}) \). This was the intermediate step; we need to re-do this but with \( A \in d( \mathcal{I} ) \) now. Hence, we set:
\begin{align*}
\mathcal{D}_2 := \{ c \in d( \mathcal{I} )\ |\ B \cap C \in d ( \mathcal{I} ) \text{ for all } B \in d( \mathcal{I}) \}
\end{align*}
From our intermediate step conclusion, we know that \( \mathcal{I} \subseteq \mathcal{D}_2 \). Next, we verify that \( \mathcal{D}_2 \) is a d-system. Exercise: verify this. This shows that \( \mathcal{D}_2 \) is a d-system and \( \mathcal{I} \subseteq \mathcal{D}_2 \). This shows us that \( d (\mathcal{I}) \subseteq \mathcal{D}_2 \). Hence, \( d( \mathcal{I} ) = \mathcal{D}_2 \). Hence, \( \forall c \in d ( \mathcal{I} ) \), \( \forall B \in d( \mathcal{I} ) \), \( B \cap C \in d( \mathcal{I} ) \). This proves that \( d ( \mathcal{I} ) \) is a \( \pi \)-system, which is what we wanted to show.
\end{proof}
This idea is very important in the study of measures. The reason why this theorem is important is because when constructing a measure, we only look at the \( \pi \)-system that generates the \( \sigma \)-algebra.
\begin{defn}[Additive]
Let \( S \) be a set. Let \( \Sigma_0 \) be an algebra of subsets of \( S \). Let \( \mu_0 \) be a non-negative set function defined on \( \Sigma-0 \) i.e.:
\begin{align*}
\mu_0 := \sigma_0 \rightarrow [0, \infty ].
\end{align*}
We say that \( \mu_0 \) is \textbf{additive} if (1) is \( \mu_0 ( \emptyset ) = 0 \) and (2) \( \forall A, B \in \Sigma_0 \) and \( A \cap B = \emptyset \), \( \mu_0 (A \cup B) = \mu_0(A) + \mu_0(B) \).
\end{defn}
\begin{defn}
We say that \( \mu_0 \) is \textbf{countably additive} if:
\begin{enumerate}[noitemsep]
\item \( \mu_0( \emptyset) = 0 \)
\item \( \forall \) \( A_n \in \Sigma_0 \),\( n \geq 1 \) such that \( A_i \cap A_j = \emptyset \) for all \( i \neq j \) and \( \cup_{n=1}^\infty A_n \in \Sigma_0 \), then we require:
\begin{align*}
\mu_0 \left( \bigcup_{n=1}^\infty A_n \right) = \sum_{n=1}^\infty \mu_0(A_n).
\end{align*}
\end{enumerate}
\end{defn}
\begin{defn}
Let \( (S, \Sigma) \) be a measure space. If \( \mu \) is a non-negative set function defined on \( \Sigma \) and if \( \mu \) is countably additive, then \(\mu\) called a \textbf{measure} and the triple \( (S, \Sigma, \mu ) \) is called a \textbf{measure space}.
\begin{enumerate}[noitemsep]
\item If \( \mu(S)>\infty \), then \( \mu \) is a \textbf{finite measure}.
\item If \( \mu(S) = 1 \), then \( \mu\) is a \textbf{probability measure}.
\item If \( \exists \) \( \{ S_n | n \geq 1 \} \subseteq \Sigma \) such that \( \bigcup_{n=1}^\infty S_n = S \) and \( \mu(S_n) < \infty \) for all \( n \geq 1 \),then \( \mu \) is \( \sigma \)-finite.
\item For \( N \in \Sigma \) such that \( \mu(N) = 0 \), then we say that \( N \) is a \textbf{null set}.
\item Of a statement holds everywhere except on a null set, then we say that the statement is true \textbf{almost everywhere} or \textbf{almost surely}.
\end{enumerate}
\end{defn}
\textbf{Remark:} All measures that we will discuss in this course will either be finite or \( \sigma \)-finite.
\subsection{Properties of a Measure \( \mu \)}
\begin{prop}[Monotonicity]
Let \( A, B \in \Sigma \), \( A \subseteq B \). Then, \( \mu(A) \leq \mu(B) \).
\end{prop}
\textbf{Caution!} Do not take the difference, \( \mu(B) - \mu(A) \),because both \( \mu(A) \) and \( \mu(B) \) might be infinite. This is undefined! If at least one of them are finite, then this is ok.
\begin{prop}[Subadditivity]
Let\( A_n \in \Sigma \) for all \( n \geq 1 \). Then:
\begin{align}
\mu \left( \bigcup_{n=1}^\infty A_n \right) \leq \sum_{n=1}^\infty \mu(A_n).
\end{align}
\end{prop}
\begin{proof}
We can prove this using the monotonicity of \( \mu \). Define a new collection of sets \( B_n \):
\begin{align*}
B_1 & = A_1 \\
B_2 & = A_2 \setminus A_1 \\
B_n & = A_n \setminus \left( \bigcup_{j=1}^{n-1} A_j \right).
\end{align*}
The \( B_n \)'s are disjoint, and by construction \( B_n \subseteq A_n \). This gives us that \( \forall \) \( n \geq 1 \) we have: \( \mu(B_n) \leq \mu (A_n) \). Furthermore, \( \bigcup_{n=1}^\infty B_n = \bigcup_{n=1}^\infty A_n \). Combining all this together and using the countable additivity of \( \mu \) on the \( B_n \)'s, we obtain:
\begin{align*}
\mu \left( \bigcup_{n=1}^\infty A_n \right) = \mu \left( \bigcup_{n=1}^\infty B_n \right) = \sum_{n=1}^\infty \mu(B_n) \leq \sum_{n=1}^\infty \mu(A_n).
\end{align*}
\end{proof}
\begin{prop}[Continuity from Below]
Let \( A_n \in \Sigma \) for all \( n \geq 1 \). Also suppose that \( A_n \uparrow \) (``monotonic increasing sequence''). Then:
\begin{align}
\mu \left( \bigcup_{n=1}^\infty A_n \right) = \lim_{n \rightarrow \infty} \mu (A_n).
\end{align}
When \( A_n \uparrow \), we view \( \bigcup_{n=1}^\infty A_n \) as \( \lim_{n \rightarrow \infty} \mu(A_n) \). Then, \( \mu ( \lim_{n \rightarrow \infty} A_n) = \lim_{n \rightarrow } \mu (A_n) \). This is what continuity means: I can interchange the limit with the measure \( \mu \).
\end{prop}
\begin{proof}
Define a new sequence of disjoint sets \( B_n \) as follows:
\begin{align*}
B_1 & = A_1 \\
B_n & = A_n \setminus A_{n+1} \forall n \geq 2.
\end{align*}
The \( B_n \)'s are clearly disjoint, and \( \bigcup_{n=1}^\infty B_n = \bigcup_{n=1}^\infty A_n \). Hence, we can use countable additivity:
\begin{align*}
\mu \left( \bigcup_{n=1}^\infty B_n \right) = \sum_{n=1}^\infty \mu (B_n ) = \lim_{n \rightarrow \infty} \sum_{j=1}^n \mu (B_j) = \lim_{n \rightarrow \infty} \mu \left( \bigcup_{j=1}^n B_j \right) = \lim_{n \rightarrow \infty } \mu (A_n).
\end{align*}
\end{proof}
\begin{prop}[Continuity from Above]
If \( A_n \in \Sigma \) for all \( n \geq 1 \), \( A_n \downarrow \), AND \( \mu(A_n) \) is finite for \emph{some} \( n \geq 1 \), then:
\begin{align}
\mu \left( \bigcap_{n=1}^\infty A_n \right) = \lim_{n \rightarrow \infty} \mu(A_n).
\end{align}
\end{prop}
\begin{proof}
We'll show this property through continuity from below. WLOG, assume that \( \mu (A_1 ) < \infty \). By monotonicity, this implies that \( \mu(A_n) \) is finite \( \forall n \in \N \). Using our decreasing sequence \( \{ A_n \} \) of sets, construct an increasing sequence of sets as follows:
\begin{align*}
B_n := A_1 \setminus A_n \text{ for all } n \geq 1.
\end{align*}
Then, \( B_n \uparrow \) and we can use Continuity from Below:
\begin{align*}
\lim_{n \rightarrow \infty} \mu (B_n) & = \mu \left( \bigcup_{n=1}^\infty B_n \right) \\
& = \mu \left( \bigcup_{n=1}^\infty ( A_1 \setminus B_n) \right) \\
& = \mu \left( A_1 \setminus \bigcap_{n=1}^\infty B_n \right) \\
& = \mu(A_1) - \mu \left( \bigcap_{n=1}^\infty B_n \right) \text{ (this is ok since everything is finite).}
\end{align*}
Hence,
\begin{align*}
\lim_{n \rightarrow \infty} \mu(A_n) = \mu \left( \bigcap_{n=1}^\infty A_n \right).
\end{align*}
\end{proof}
\textbf{Remark:} in general, the assumption ``\( \mu(A_n) < \infty \) for some \( n \)'' is necessary. For example, consider the set \( S = \R \), \( A_n := ]n, \infty[ \). Consider the lebesgue measure which we will denote by \( \mu \). Then: \( A_n \) is a decreasing sequence, but for each \( n \):
\begin{align*}
\mu ( ]n, \infty [ ) = \infty \Rightarrow \lim_{n \rightarrow \infty} \mu (A_n) = \infty.
\end{align*}
However, since \( A_n \downarrow \emptyset \),
\begin{align*}
\mu \left( \bigcap_{n=1}^\infty A_n \right) = 0.
\end{align*}
This is a contradiction.
\begin{thm}
Given a set \( S \) and an algebra \( \Sigma_0 \). Suppose that \( \mu \) is a non-negative set function:
\begin{align*}
\mu: \Sigma_0 \rightarrow [0, \infty [
\end{align*}
and \( \mu \) is finitely additive. Then, \( \mu \) is countably additive \( \iff \) \( \mu \) is continuous at \( \emptyset \). We call this ``continuity at the empty set.''
\end{thm}
\begin{proof}
To do.
\end{proof}
\subsection{Existence and Uniqueness of Measure}
Before proceeding, we define what it means for two measures to be equal. Given a measure space \( (S, \Sigma) \) and two measures \( \mu_1, \mu_2 \), we say that \( \mu_1 = \mu_2 \) if \( \mu_1(A) = \mu_2 (A) \) for all \(A \in \Sigma \).
\begin{thm}
Given a set \( S \) and a \( \pi \)-system \( \pisys \) of subsets of \( S \). Let \(S \) and \( \Sigma \) be the sigma algebra generated by the set \( \pisys \). Then, if \( \mu_1(S) = \mu_2(S) < \infty \) and if \( \mu_1(A)= \mu_2(A) \) for all \( A \in \pisys \), then \( \mu_1 = \mu_2 \) on the whole sigma-algebra.
\end{thm}
Significance of this theorem:
\begin{enumerate}[noitemsep]
\item Only \( \R \) valued.
\item Can extend to any \( \sigma\)-finite space.
\end{enumerate}
\begin{thm}[Caratheodory's Extension Theorem]
Given a set \( S \), suppose that \( \Sigma_0 \) is an algebra and \( \mu_0: \Sigma_0 \rightarrow [0, \infty ] \) is countably additive. Then, there exists a measure defined on \( \Sigma = \sigma ( \Sigma_0 ) \) such that \( \mu(A) = \mu_0(A) \) for all \( A \in \Sigma \).
\end{thm}
We can extend a measure on \( \Sigma_0 \) to \( \Sigma \). Moreover, if \( \mu_0(S) <\infty \), then such an extension is unique.
\subsection{Completion of Measure / Measure Space}
Sometimes it is convenient to ``assume'' subsets of null sets are measurable. Let \( (S, \Sigma, \mu) \) be a measurable space. Let \( N := \{ A \subseteq S\ |\ \exists B \in \Sigma \text{ and } \mu(B) = 0 \text{ s.t. } A \subseteq B \} \). Define:
\begin{align}
\Sigma^* := \{ F \subseteq S\ |\ \exists\ G, H \in \Sigma\ \text{ s.t. } G \subseteq F \subseteq H \text{ and } \mu (H \setminus G ) = 0 \}
\end{align}
\begin{prop}
\( \Sigma^* \) is a sigma-algebra and \( \Sigma^* \) is the sigma-algebra generated by
\begin{align*}
\Sigma^* = \sigma ( \Sigma \cup N ).
\end{align*}
\end{prop}
\begin{prop}
Define \( \mu^* \) to be a set function of \( \Sigma^* \) by:
\begin{align*}
\forall\ F \in \Sigma^* \text{ if } G \subseteq F \subseteq H \text{ for some } G, H \in \Sigma \text{ with } \mu(G) = \mu(H). \text{ Then, } \mu^*( F) = \mu(G) = \mu(H).
\end{align*}
Then, \( \mu^* \) is a measure on \( (S, \Sigma^*) \).
\end{prop}
\begin{defn}[Complete Measure Space]
\( (S, \Sigma^*, \mu^*) \) is a \textbf{complete measure space}, i.e., the completion of \( (S, \Sigma, \mu) \).
\end{defn}
\section{Events and Independence}
Throughout this section, assume that \( ( \Sigma, \mathcal{F}, \mathbb{P} ) \) is a probability space.
\begin{defn}[Limsup and Liminf]
Let \( \{ A_n\ |\ n \geq 1 \} \) be a sequence of events, i.e., \( A_n \in \mathcal{F} \) for all \( n \geq 1 \). Then:
\begin{align}
\limsup_{n \rightarrow \infty} A_n & := \bigcap_{n=1}^\infty \bigcup_{m=n}^\infty A_m. \\
& = \lim_{n \rightarrow \infty} \bigcup_{m=n}^\infty A_m. \\
\liminf_{n \rightarrow \infty} A_n & := \bigcup_{n=1}^\infty \bigcap_{m=n}^\infty A_m. \\
& = \lim_{n \rightarrow \infty} \bigcap_{m=n}^\infty A_m.
\end{align}
\( \limsup \) is the collection of outcomes which are in infinitely many \( A_n \)'s; the \( \liminf \) is the collection of outcomes which are in every \( A_n \) after a certain cutoff.
\end{defn}
\textbf{Remarks}:
\begin{enumerate}[noitemsep]
\item Obviously, \( \liminf_{n } A_n \subseteq \limsup_{n } A_n \). If \( \liminf_{n } A_n = \limsup_{n} A_n \), we say that \( \lim_{n} A_n \) exists.
\item If \( \{ A_n\ |\ n \in \N \} \) is a sequence of events and \( \{ B_n\ |\ n \in \N \} \subseteq \mathcal{F} \) and \( A_n \subseteq B_n \). Then:
\begin{align*}
\limsup_{n} A_n \subseteq \limsup_{n} B_n
\end{align*}
and
\begin{align*}
\limsup_{n} A_n \subseteq \limsup_{n} B_n.
\end{align*}
\item \textbf{(deMorgan's Law)}: \( \limsup_{n}A_n^c =(\liminf_{n} A_n)^c \) and \( \liminf_{n}A_n^c = ( \limsup_{n} A_n)^c \).
\item \( ( \limsup_{n}A_n ) \setminus ( \liminf_{n} A_n ) = \limsup_{n} ( A_n \setminus A_{n+1} ) \).
\item Let \( \{ A_n\ |\ n \in \N \} \) and \( \{ B_n\ |\ n \in \N \} \) be two sequences of events. Then in general:
\begin{enumerate}[noitemsep]
\item \( ( \limsup_{n} A_n) \cap (\limsup_{n}B_n ) \supseteq \limsup_{n}(A_n \cap B_n) \).
\item \( (\limsup_{n}A_n ) \cup ( \limsup_{n} A_n ) = \limsup_{n} (A_n \cup B_n ) \).
\item \( ( \liminf_{n} A_n ) \cap ( \liminf_n B_n ) = \liminf_{n} (A_n \cap B_n) \).
\item \( (\liminf_{n} A_n ) \cup ( \liminf_{n} B_n ) \subseteq \liminf_{n} (A_n \cup B_n) \).
\end{enumerate}
\end{enumerate}
\begin{thm}[Borel-Cantelli Lemma (BC1)]
If \( \sum_{n=1}^\infty \prob{A_n} < \infty \) then \( \prob{ \limsup_{n \rightarrow \infty A_n }} = 0 \).
\end{thm}
\begin{defn}[Independent]
Given a probability space \( ( \Omega, \mathcal{F}, \mathbb{P}) \), a sequence of events \( \{ E_n\ |\ n \geq 1 \} \subseteq \mathcal{F} \) is called \textbf{(mutually) independent} if \( \forall k \in \N \), \( \forall 1 \leq i_1 \leq i_2 \leq ... \leq i_k \):
\begin{align*}
\prob{ \bigcap_{j=1}^k E_{ij}} = \prod_{j=1}^k \prob{ E_{ij} }
\end{align*}
\end{defn}
\begin{defn}
Let \( (\Omega, \mathcal{F}, \mathbb{P} ) \) be a probability space. Suppose that \( \{ G_n\ |\ n \in \N \} \) is a sequence of \( \sigma \)-algebras (of subsets of \( \Omega \). Then, \( \{ G_n\ |\ n \in \N \} \) is \emph{independent} if for any choice of \( E_n \in G_n \), \( n \geq 1 \), \( \{ E_n\ |\ n \geq 1 \} \) is independent.
\end{defn}
\begin{prop}
Given \( \{ E_n\ |\ n \geq 1 \} \subseteq \mathcal{F} \), we say that the collection \( \{ E_n\ |\ n \geq 1 \} \) is independent \( \iff \) \( \{ \sigma(E_n)\ |\ n \geq 1 \} \) is independent.
\end{prop}
\begin{thm}
Given \( (\Omega, \mathcal{F}, \mathbb{P}) \), \( \{ I_n\ |\ n \geq 1 \} \) be sequence of \( \pi \)-systems (of subsets of \( \Omega \)) \( I_n \subseteq \mathcal{F} \), \( \forall \) \( n \geq 1 \). Then, \( \{ E_n\ |\ n \geq 1 \} \) is independent \( \iff \) \( \{ \sigma (I_n)\ |\ n \geq 1 \} \) is independent.
\end{thm}
\begin{thm}[Borel-Cantelli Lemma (BC2)]
Given a probability space \( (\Omega, \mathcal{F}, \mathbb{P}) \) let \( \{ E_n\ |\ n \in \N \} \) be an independent sequence of events if:
\begin{align}
\sum_{n=1}^\infty \prob{E_n} = \infty, \text{ then } \prob{ \limsup_{n} E_n } = 1.
\end{align}
\end{thm}
\begin{defn}[Tail Sigma-Algebra]
Given \( (\Omega, \mathcal{F}, \mathbb{P}) \) and a sequence of events \( \{ E_n\ |\ n \geq 1 \} \). Define:
\begin{align}
\mathcal{T} := \bigcap_{n=1}^\infty \sigma (\{ E_n, E_{n+1}, E_{n+2},... \}).
\end{align}
Then, \( \mathcal{T} \) is defined to be the \textbf{tail sigma algebra} associated with \( \{ E_n\} \). If an event\( A \in \mathcal{T} \), then \( A \) is a \textbf{tail event} with respect to \(\{ E_n\ |\ n \in \N \} \).
\end{defn}
\begin{thm}[Kolmogorov's 0-1 Law]
If \( \{ E_n\ |\ n \geq 1 \} \) is independent, and \( \mathcal{T} \) is the tail sigma-algebra associated with \( E_n \) then for all \( A \in \mathcal{T} \), either \( \prob{A} = 0 \) or \( \prob{A} = 1 \).
\end{thm}
\section{Random Variables}
\begin{defn}[Measurable]
Let \( (S, \Sigma) \) be a measurable space and \( h: S \rightarrow \R \) (in certain situations, could be \( \overline{\R} \)) be a function. We say that \( h \) is \( \Sigma \)\textbf{-measurable}, denoted by \( h \in m \Sigma \) if: \( \forall B \in \mathcal{B}(\R) \), the pre-image of \( B \) under \( h \) is measurable, i.e., \( h^{-1}(B) \in \Sigma \).
\end{defn}
Remarks:
\begin{enumerate}[noitemsep]
\item If \( h \) is a measurable function, \( h \in m \Sigma \), then \( \{ h = \infty \} = \{ s \in S\ |\ h(s) = + \infty \} \in \Sigma \) and \( \{ h = - \infty \} = \{ s \in S\ |\ h(s) = + \infty \} \in \Sigma \).
\item More generally, \( h: (S_1, \Sigma_1 ) \rightarrow (S_2, \Sigma_2 ) \), then we say that \( h \) is \( \Sigma_1 \setminus \Sigma_2 \)-measurable if \( \forall B \in \Sigma_2 \), \( h^{-1}(B) \in \Sigma_1 \).
\item For al \( A \subseteq \R \), \( h^{-1}(A^c ) = (h^{-1}(A))^c \). Moreover, for all \( A_\alpha \in \R \),where \( \alpha \in I \),
\begin{align*}
h^{-1} \left( \bigcup_{\alpha \in I} A_\alpha \right) & = \bigcup_{\alpha \in I} h^{-1}(A_\alpha), \\
h^{-1} \left( \bigcap_{\alpha \in I} A_\alpha \right) & = \bigcap_{\alpha \in I} h^{-1}(A_\alpha).
\end{align*}
\item Suppose \( \mathcal{C} \subseteq \mathcal{B}(\R) \)and \( \sigma ( \mathcal{C} ) = \mathcal{B}(\R ) \). Then, a function is measurable \( \iff \) \( \forall c \in \mathcal{C} \), \( h^{-1}(c)\in \Sigma \).
\item Given \( h: S \rightarrow \R \) measurable, \( f: \R \rightarrow \R \) a Borel function, then \( (f \circ h ) \in m \Sigma \).
\item Given \( h_1, h_2 \in m\Sigma \), \( h_1 + h_2\), \( h_1 - h_2 \), \( h_1 \cdot h_2 \), \( \frac{h_1}{h_2} \) (where \( h_2 \neq 0 \)),..., \( \in m\Sigma \).
\item Given \( \{ h_n\ |\ n \in \N \} \subseteq m \Sigma \), we have:
\begin{align*}
\inf_n h_n,\ \sup_n h_n,\ \liminf_n h_n,\ \limsup_n h_n \in m \Sigma.
\end{align*}
\end{enumerate}
\begin{defn}[Random Variable]
Consider a random probability space \( (\Sigma, \mathcal{F},\mathbb{P}) \). \( X: \Sigma \rightarrow \R \) is a \textbf{Random Variable} if \( X \) is \( \mathcal{F} \)-measurable, i.e., \( X \in m \mathcal{F} \).
\end{defn}
\begin{defn}[Sigma Algebra Generated by \( X\)]
Let \( X \) be a random variable. The \( \sigma \)-algebra generated by \( X \), denoted by \( \sigma(X) \), is:
\begin{align}
\sigma(X) := \{ X^{-1}(\mathcal{B})\ |\ B \in \mathcal{B}(\R) \}.
\end{align}
I.e.: \( \sigma(X) \) is the smallest \( \sigma \)-algebra with respect to which the random variable \( X \) is measurable.
\end{defn}
\textbf{Remarks}:
\begin{enumerate}[noitemsep]
\item Given \( ( \Sigma, \mathcal{F}, \mathbb{P}) \) and \( X: \Omega \rightarrow \R \). Then, \( X \) is a random variable \( \iff \) \( \forall \) \( a \in \R \) \( \{ X \leq a \} \in \mathcal{F} \).
\item Let \( \{ X_\alpha\ |\ \alpha \in I \} \) be a family of random variables on \( (\Omega, \mathcal{F}, \mathbb{P} ) \). Then, the \( \sigma \)-algebra generated by \( \{ X_\alpha\ |\ \alpha \in I \} \) is
\begin{align*}
\sigma ( \{ X_\alpha\ |\ \alpha \in I \} ) = \sigma( \{ X_\alpha^{-1} (B)\ |\ B \in \mathcal{B}(\R), \alpha \in I \} ).
\end{align*}
\item Let \( \{ X_n\ |\ n \in \N \} \) be a sequence of random variables on \( ( \Omega, \mathcal{F}, \mathbb{P} ) \). Set:
\begin{align*}
\mathcal{P} := \left\{ \bigcap_{j=1}^k \{ x_{n_j} \leq a_j \} \right \}.
\end{align*}
Then, \( \mathcal{P} \) is a \( \pi \)-system and \( \sigma( \mathcal{P}) = \sigma (\{ X_n\ |\ n \in \N \} )\).
\end{enumerate}
\begin{defn}[Independent Random Variables]
Given \( (\Omega, \mathcal{F}, \mathbb{P} ) \) and a sequence of random variables \( \{ X_n\ |\ n \in \N \} \), \( \{ X_n\ |\ n \in \N \} \) is \textbf{independent} if \( \{ \sigma (X_n)\ |\ n \in \N \} \) is independent.
\end{defn}
\begin{prop}
\( \{ X_n\ |\ n \in \N \} \) are independent \( \iff \) \( \forall k \geq 1 \) , \( \forall \) \( 1 \leq n_1 \leq n_2 \leq ... \leq n_k \), \( \forall a_1, a_2, ...,a_k \in \R \):
\begin{align}
\prob{\bigcap_{j=1}^k \{ X_{n_j} \leq a_j \} } = \prod_{j=1}^k \prob{X_{n_j} \leq a_j }.
\end{align}
\end{prop}
\begin{defn}[Tail Sigma Algebra]
Let \( \{ X_n\ |\ n \in \N \} \) be a sequence of random variables. The \textbf{tail sigma-algebra} associated with \( \{ X_n\ |\ n \in \N \} \) is defined as:
\begin{align}
\mathcal{T} := \bigcap_{n=1}^\infty \sigma (\{X_n, X_{n+1}, ... \} )
\end{align}
\end{defn}
\begin{thm}[Kolmogorov's 0-1 Law]
Let \( \{ X_n\ |\ n \in \N \} \) be a sequence of independent random variables and let \( \mathcal{T} \) be the tail sigma-algebra associated with \( \{ X_n\ |\ n \in \N \} \). Then, for all \( A \in \mathcal{T} \):
\begin{align}
\prob{A} \in \{ 0, 1 \}
\end{align}
If \( X \in m \mathcal{T} \), then \( X \) is constant a.s., i.e., \( \exists \) an \( a \in \bar{\R} \) such that \( \prob{X = a} = 1 \).
\end{thm}
\section{Distribution Functions}
\begin{defn}[Law/Distribution]
Given \( (\Omega, \mathcal{F}, \mathbb{P}) \) and a random variable \( X: \Omega \rightarrow \R \). The \textbf{law/ distribution} of \( X \), denoted by \( \mathcal{L}_X \), id the probability distribution on \( (\R, \mathcal{B}(\R)) \)such that for all \( B \in \mathcal{B}( \R) \):
\begin{align}
\mathcal{L}_X(B) = \prob{X^{-1}(B)} = \prob{X \in B }.
\end{align}
The \textbf{distribution function} of \( X \) (of \( \mathcal{L}_X \)) is:
\begin{align*}
F_x: \R & \rightarrow [0, 1] \\
x \in \R & \mapsto F_X(x) := \prob{X \leq x} = \mathcal{L}_X(]-\infty, x])
\end{align*}
\end{defn}
\textbf{Remarks}
\begin{enumerate}[noitemsep]
\item \( F_X \) is increasing.
\item \( \lim_{x \rightarrow + \infty} F_X(x) = 1 \).
\item \( \lim_{x \rightarrow -\infty} F_X(x) = 0 \).
\item \( F_X \) is \textbf{right continuous}: \( \forall \) \( a \in \R \), \( F_X(a^+) = \lim_{x \rightarrow a^+} F_X(x) = F_X(a) \).
\item For all \( a > b \), \( F_X(a) - F_X(b) = \prob{b < X < a } = \mathcal{L}_X(]b, a]) \).
\end{enumerate}
\begin{defn}[Independent and Identically Distributed]
Let \( \{ X_n\ |\ n \in \N \} \) be a sequence of random variables. We say they are \textbf{independent and identically distributed (iid)} if \( \{ X_n\ |\ n \in \N \} \) is independent and for some probability measure \( \mu\) on \( ( \R, \mathcal{B}(\R) ) \), \( \mathcal{L}_{X_n} = \mu \) for all \( n \in \N \).
\end{defn}
\end{document}