In [20]:
import numpy as np
import matplotlib.pyplot as plt
import csv
import pandas as pd
%matplotlib inline

# Refresher on Linear Algebra and Derivatives


- (a) Let $A$ be a $3 \times 4$ matrix and $B$ a $3 \times 2$ matrix, what is the size of $A^T B$.

> the size of $A^T B$ is $4 \times 2$ matrix.

In [33]:
A = np.ones(shape=(3,4))
B = np.ones(shape=(3,2))
C = A.T.dot(B)
print("A.T:{} x B:{}=C:{}".format(A.T.shape, B.shape, C.shape))

A.T:(4, 3) x B:(3, 2)=C:(4, 2)


- (b) Let $x \in R^n$ be a column vector (vectors are always columns for us) and $A$ a $m × n$ matrix. What is the size of $Ax$.

> the size of $Ax$ is $m \times 1$. matrix

In [36]:
x = np.ones(shape=(4,1))
A = np.ones(shape=(5,4))
y = A.dot(x)
print("A:{} * x:{} = y:{}".format(A.shape, x.shape, y.shape))

A:(5, 4) * x:(4, 1) = y:(5, 1)


- (c) What is the derivative of $f(x) = (2x + y)^2$ w.r.t. x:$\frac{\partial}{\partial x}f(x)$

$$\begin{aligned}
\frac{\partial}{\partial x} f(x) &= \frac{\partial}{\partial x} (2x+y)^2 \\
                                 &= \frac{\partial}{\partial x} (4x^2 + 4xy + y^2) \\
                                 &= 8x + 4y
\end{aligned}$$

- (d) Given $f(x) = g(x^2)$ where $g(x) = (x + y)^2$, what is $\frac{\partial}{\partial x}f(x)$

$$\begin{aligned}
\frac{\partial}{\partial x}f(x) &= \frac{\partial}{\partial x} g(x^2) \\
                                &= \frac{\partial}{\partial x} (x^2+y)^2 \\
                                &= \frac{\partial}{\partial x} (x^4 + 2x^2y + y^2) \\
                                &= 4x^3 + 4xy
\end{aligned}$$

# Multivariable Calculus

Recall that a matrix $A \in R^{n\times n}$ is symmetric if $A^T = A$, that is, $A_{ij} = A_{ji}$ for
all $i, j$. Also recall the gradient $\Delta f(x)$ of a function $f : R^n → R$ is the $n$−vector
of partial derivatives

$$
\Delta f(x) = \left\{
\begin{matrix}
    \frac{\partial}{\partial x_1}f(x) \\
    ... \\
    \frac{\partial}{\partial x_1}f(x)
    \end{matrix}
\right\}
$$

where

$$
x = \left\{
\begin{matrix}
    x_1 \\
    ... \\
    x_n
\end{matrix}
\right\}
$$

The hessian $\Delta^2 f(x)$ is the $n\times n$ symmetric matrix of twice partial derivatives,

$$\begin{aligned}
\Delta^2f(x) = \left\{
\begin{matrix}
    \frac{\partial^2}{\partial x_1^2}f(x) & \frac{\partial^2}{\partial x_1 \partial x_2}f(x) & \cdots & \frac{\partial^2}{\partial x_1 \partial x_n} \\
    \frac{\partial^2}{\partial x_2 \partial x_1}f(x) & \frac{\partial^2}{\partial x_2^2}f(x) & \cdots & \frac{\partial^2}{\partial x_2 \partial x_n} \\
    \vdots & \vdots & \ddots & \vdots \\
    \frac{\partial^2}{\partial x_n \partial x_1}f(x) & \frac{\partial^2}{\partial x_n \partial x_2}f(x) & \cdots & \frac{\partial^2}{\partial x_n^2} \\
\end{matrix}
\right\}
\end{aligned}$$


- a Let $f(x) = \frac{1}{2}x^TAx+b^Tx$, where $a$ is a sysmmetric matrix and $b\in R^n$ is a vector. What is $\Delta f(x)$?

$\because$ $A\in R^{n\times n}$, $A_{ij} = A_{ji}$ and $f(x)= \frac{1}{2}x^TAx+b^Tx$

$\therefore$
$$\begin{aligned}
    \frac{\partial}{\partial x_i} f(x) &= \frac{\partial}{\partial x_1} (\frac{1}{2}x^TAx+b^Tx) \\
    &= \frac{\partial}{\partial x_i}\space \frac{1}{2}\left\{\begin{matrix}
            x_1 \cdots x_n
            \end{matrix}\right\}
            \left\{\begin{matrix}
            A_{1,1} & A_{1,2}\space & \cdots & A_{1,n} \\
            A_{2,1} & A_{2,2}\space & \cdots & A_{2,n} \\
            \vdots & \vdots & \ddots & \vdots \\
            A_{n,1} & A_{n,2} & \cdots & A_{n,n} \\
            \end{matrix}\right\}
            \left\{\begin{matrix}
            x_1 \\
            x_2 \\
            \vdots \\
            x_n
            \end{matrix}\right\}
            +\left\{\begin{matrix}
            b_1\space b_2\space ... b_n
            \end{matrix}\right\}\left\{\begin{matrix}
            x_1 \\
            x_2 \\
            \vdots \\
            x_n
            \end{matrix}\right\} \\
    &= \frac{\partial}{\partial x_i} (\frac{1}{2} \sum_{i=1}^{n}\sum_{j=1}^{n}A_{ij}x_ix_j + \sum_{i=1}^n b_ix_i) \\
    &= \frac{\partial}{\partial x_i} [\frac{1}{2} (\sum_{i=1}^{n}A_{ii}x_i^2 + \sum_{i=1, i\neq j}^{n}\sum_{j=1, i\neq j}^{n}A_{ij}x_ix_j) + \sum_{i=1}^n b_ix_i] \\
    &= A_{ii}x_i + \sum_{j=1,i\neq j}^{n}A_{ij}x_j + \sum_{i=1}^{n}b_i
\end{aligned}$$


$\therefore$
$$
\Delta f(x) = \left\{
\begin{matrix}
    A_{1,1}x_1 + \sum_{j=1, j\neq 1}A_{1,j}x_j + \sum_{i=1}^{n}b_i \\
    \vdots \\
    A_{n,n}x_n + \sum_{j=1, j\neq n}A_{n,j}x_j + \sum_{i=1}^{n}b_i
\end{matrix}
\right\}
$$

- b Let $f(x) = g(a^Tx)$

- c

- d

# Hands On

In [7]:
with open("diabetes.txt","r") as file:
    reader = csv.reader(file, delimiter=' ')
    table = np.asarray([row for row in reader], dtype=np.float)


In [12]:
training_set = table[:200,:]
validation_set = table[200:,:]

In [10]:
table.shape

(442, 11)