-
Notifications
You must be signed in to change notification settings - Fork 6
/
final.tex
208 lines (189 loc) · 7.69 KB
/
final.tex
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
\documentclass[3pt,landscape]{article}
%ss[10pt,landscape]{article}
\usepackage{multicol}
\usepackage{calc}
\usepackage{ifthen}
\usepackage[landscape]{geometry}
\usepackage{amsmath,amsthm,amsfonts,amssymb}
\usepackage{color,graphicx,overpic}
\usepackage{hyperref}
\pdfinfo{
/Title (example.pdf)
/Creator (TeX)
/Producer (pdfTeX 1.40.0)
/Author (Seamus)
/Subject (Example)
/Keywords (pdflatex, latex,pdftex,tex)}
% This sets page margins to .5 inch if using letter paper, and to 1cm
% if using A4 paper. (This probably isn't strictly necessary.)
% If using another size paper, use default 1cm margins.
\ifthenelse{\lengthtest { \paperwidth = 11in}}
{ \geometry{top=.5in,left=.5in,right=.5in,bottom=.5in} }
{\ifthenelse{ \lengthtest{ \paperwidth = 297mm}}
{\geometry{top=1cm,left=1cm,right=1cm,bottom=1cm} }
{\geometry{top=1cm,left=1cm,right=1cm,bottom=1cm} }
}
% Turn off header and footer
\pagestyle{empty}
% Redefine section commands to use less space
\makeatletter
\renewcommand{\section}{\@startsection{section}{1}{0mm}%
{-1ex plus -.5ex minus -.2ex}%
{0.5ex plus .2ex}%x
{\normalfont\large\bfseries}}
\renewcommand{\subsection}{\@startsection{subsection}{2}{0mm}%
{-1explus -.5ex minus -.2ex}%
{0.5ex plus .2ex}%
{\normalfont\normalsize\bfseries}}
\renewcommand{\subsubsection}{\@startsection{subsubsection}{3}{0mm}%
{-1ex plus -.5ex minus -.2ex}%
{1ex plus .2ex}%
{\normalfont\small\bfseries}}
\makeatother
% Define BibTeX command
\def\BibTeX{{\rm B\kern-.05em{\sc i\kern-.025em b}\kern-.08em
T\kern-.1667em\lower.7ex\hbox{E}\kern-.125emX}}
% Don't print section numbers
\setcounter{secnumdepth}{0}
\setlength{\parindent}{0pt}
\setlength{\parskip}{0pt plus 0.5ex}
%My Environments
\newtheorem{example}[section]{Example}
% -----------------------------------------------------------------------
\def\ci{\perp\!\!\!\perp}
\begin{document}
\raggedright
\footnotesize
\begin{multicols}{3}
% multicol parameters
% These lengths are set only within the two main columns
%\setlength{\columnseprule}{0.25pt}
\setlength{\premulticols}{1pt}
\setlength{\postmulticols}{1pt}
\setlength{\multicolsep}{1pt}
\setlength{\columnsep}{2pt}
\begin{center}
\Large{\underline{CS 188 Final Note Sheet}} \\
\end{center}
% todo beginning of the midterm 2 material
\subsection*{Probability}
\subsubsection*{Product Rule:}
\[P(y)P(x|y)=P(x,y)\]
\subsubsection*{Chain Rule:}
\[P(x_{1},x_{2},x_{3})=P(x_{1})P(x_{2}|x_{1})P(x_{3}|x_{1},x_{2})\]
\subsubsection*{Bayes Rule:}
\[P(x,y)=P(x|y)P(y)=P(y|x)P(x)\]
\[P(x|y)=\frac{P(y|x)}{P(y)}P(x)\]
\[P(\texttt{Cause}|\texttt{Effect})=\frac{P(\texttt{Effect}|\texttt{Cause})P(\texttt{Cause})}{P(\texttt{Effect})}\]
\subsubsection*{Independence:}
Two variable are independent in a joint distribution if:
\[P(x,y)=P(x)P(y)\]
\[X \ci Y\]
\[ \forall x,y P(x,y)=P(x)P(y)\]
\subsection*{Conditional Independence}
\subsubsection*{X is conditionally independent of Y given Z:}
\[X \ci Y|Z\]
if and only if:
\[\forall x,y,z:P(x,y|z)=P(x|z)P(y|z)\]
or equivalently, if and only if:
\[\forall x,y,z:P(x|z,y)=P(x|z)\]
\subsubsection*{Inference: Calculating some useful quantity from a joint probablility distribution.}
Example:\\
Posterior probability
\[P(Q|E_{1}=e_{1},\ldots E_{k}=e_{k})\]
Most likely explanation:
\[\texttt{argmax}_{q}P(Q=q|E_{1}=e_{1}\ldots)\]
\subsection*{Bayes' Net Sampling}
Prior Sampling P\\
Rejection Sampling \(P(Q|e)\)\\
Likelihood Weighting \(P(Q|e)\)\\
\subsubsection*{Gibbs Sampling \(P(Q|e)\)}
Step 1: Fix evidence\\
Step 2: Initialize other variables randomly\\
Step 3: Repeat
\subsection*{Expected Utility}
\[\sum_{a}P(a)U(a,t)\]
\subsubsection*{Max Expected Utility:}
\[MEU(\o)=max_{a}EU(a)\]
\subsection*{Value of Perfect Information}
\[VPI(E'|e)=\left(\sum_{e'}P(e'|e)MEU(e,e')\right)-MEU(e)\]
Assume we have evidence E=e. Value if we act now:
\[MEU(e)=max_{a}\sum_{s}P(s|e)U(s,a)\]
Assume we see E'=e'. Value if we act now:
\[MEU(e,e')=max_{a}\sum_{s}P(s|e,e')U(s,a)\]
{\footnotesize but E' is a random variable whose value is unknown, so we don't know what e' will be.}\\
Expected value if E' is revealed and then we act:
\[MEU(e,E')=\sum_{e'}P(e',e)MEU(e,e')\]
Value of information: how much MEU goes up by revealing E' first then acting, over acting now:
\[VPI(E'|e)=MEU(e,E')-MEU(e)\]
\subsubsection*{Properties:}
Nonnegative:
\[\forall E',e:VPI(E'|e) \geq 0\]
Nonadditive:
\[VPI(E_{j},E_{k}|e)\neq VPI(E_{j}|e)+VPI(E_{k}|e)\]
Order-independent:
\[VPI(E_{j},E_{k}|e)=VPI(E_{j}|e)+VPI(E_{k}|e,E_{j})\]
\[=VPI(E_{k}|e)+VPI(E_{j}|e,E_{k})\]
\subsection*{Markov Models}
\subsubsection*{Forward Algorithm:}
\[\texttt{given } P(x_{1}) = \texttt{known, } P(x_{t})=\sum_{x_{t-1}}P(x_{t}|x_{t-1})P(x_{t-1})\]
\[\texttt{satisfying, }P_{\infty}(X)=P_{\infty +1}(X)=\sum_{x}P_{t+1|t}(X|x)P_{\infty}(x)\]
\subsection*{Hidden Markov Models}
Assume we have current belief P(X | evidence to date):
\[B(X_{t}=P(X_{t}|e_{1:t})\]
Then after one time step passes (Elapse Time):
\[B'(X_{t+t})=\sum_{x_{t}}P(X'|x)B(x_{t})\]
Result of an observation, given current Belief:
\[B'(X_{t+1})=P(X_{t+1}|e_{1:t})\]
\subsection*{Dynamic Bayes Net Particle Filters}
Initialize: Generate prior samples for the t=1 Bayes net
\[\texttt{Example particle: }G_{1}^{a}=(3,3),G_{1}^{b}=(5,3)\]
Elapse time: Sample a successor for each particle
\[\texttt{Example successor: }G_{2}^{a}=(2,3),G_{2}^{b}=(6,3)\]
Observe: Weight each entire sample by the likelihood of the evidence conditioned on the sample
\[\texttt{Likelihood: }P(E_{1}^{a}|G_{1}^{a})*P(E_{1}^{b}|G_{1}^{b})\]
Resample: Select prior samples (tuples of values) in proporation to their likelihood
\subsection*{Parameter Estimation}
\subsubsection*{Empirical Rate (Maximum Likelihood Estimate):}
\[P_{ML}(x)=\frac{\texttt{count}(x)}{\texttt{total samples}}\]
\subsubsection*{Likelihood of the data:}
\[L(x,\Theta)=\prod_{i}P_{\Theta}(x_{i})\]
\subsubsection*{Laplace Smoothing:}
\[P_{LAP,k}(x)=\frac{c(x)+k}{N+k|X|}\]
\subsubsection*{Laplace for conditionals:}
\[P_{LAP,k}(x|y)=\frac{c(x,y)+k}{c(y)+k|X|}\]
\subsubsection*{Linear Interpolation:}
\[P_{LIN}(x|y)=\alpha \hat{P}(x|y)+(1.0-\alpha )\hat{P}(x)\]
\subsection*{Perceptrons}
\subsubsection*{Linear Classifiers:}
\[\texttt{activation}_{w}(x)=\sum_{i}w_{i}*f_{i}(x)=w*f(x)\]
\subsubsection*{Learning: Binary Perceptron:}
Start with weights = 0\\
classify with the current weights:
\[y=\left\{\begin{array}{lr}+1 & \texttt{if } w*f(x) \geq 0\\-1 & \texttt{if } w*f(x) < 0\end{array}\right.\]
\subsection*{Multiclass Perceptron}
Start with all weights = 0\\
Predict with current weights:
\[y = \texttt{arg max}_{y}w_{y}*f(x)\]
If correct make no change\\
If wrong: lower score of wrong answer, raise score of right answer
\[w_{y}=w_{y}-f(x)\]
\[w_{y*}=w_{y*}+f(x)\]
\subsubsection*{Properties of the perceptron}
Separability: true if some parameters get the training set perfectly correct\\
Convergence: if the training is separable, perceptron will eventually converge (binary case)\\
Mistake Bound: the maximum number of mistakes (binary case) related to the margin or degree of separability.
\subsubsection*{Problems with the perceptron}
Noise: if the data isn't separable, weights might thrash\\
Mediocre generalization: finds a "barely" separating solution\\
Overtraining: test/held-out accuracy usually rises, then falls. Is a kind of overfitting.
\subsubsection*{MIRA*: fixing the perceptron}
\[min_{w}\frac{1}{2}\sum_{y}||w_{y}-w_{y}'||^{2}\]
\[w_{y*}*f(x) \geq w_{y}*f(x)+1\]
% You can even have references
\rule{0.3\linewidth}{0.25pt}
\scriptsize
\bibliographystyle{abstract}
\bibliography{refFile}
\end{multicols}
\end{document}