# Week 2

### Linear Regression with one variable

In [None]:
import numpy as np
import pandas as pd

In [None]:
# Load the data
df = pd.read_csv('Andrew-Ng-ML-Course-Assignments/machine-learning-ex1/ex1/ex1data1.txt', names = ['population', 'profit'])
X = df['population'].values
y = df['profit'].values
m = df.shape[0]

In [None]:
%matplotlib inline

# Plot the data
df.plot(x = 'population', y = 'profit', kind = 'scatter')

We're given the cost function in linear algebra form, so I'll find its partial derivative with respect to theta in order to complete the multivariate case as well.

Given: $J(\theta)=\frac{1}{2m}(X\theta-y)^T(X\theta-y)$

The transpose of the sum is the sum of the transpose =>
$J(\theta)=\frac{1}{2m}[((X\theta)^T-y^T)(X\theta-y)]$

Multiplication=>
$J(\theta)=\frac{1}{2m}[(X\theta)^TX\theta-(X\theta)^Ty-y^TX\theta+y^Ty]$

$X\theta^Ty=y^TX\theta$ => 
$J(\theta)=\frac{1}{2m}[(X\theta)^TX\theta-2(X\theta)^Ty+y^Ty]$

$(X\theta)^T=\theta^TX^T$ =>
$J(\theta)=\frac{1}{2m}[\theta^TX^TX\theta-2\theta^TX^Ty+y^Ty]$

$\dfrac{\partial \theta^TX^TX\theta}{\partial \theta}=2X^TX\theta$

$\dfrac{\partial \theta^TX^Ty}{\partial \theta}=X^Ty$

$\dfrac{\partial J(\theta)}{\partial \theta}=\frac{1}{2m}[2X^TX\theta-2X^Ty]$

$\dfrac{\partial J(\theta)}{\partial \theta}=\frac{1}{m}[X^TX\theta-X^Ty]$

Solving for zero will give us the normal equation, which is the analytic solution for multivariate linear regression.

In [None]:
# Gradient Descent
def compute_cost(X, y, theta):
    '''Computes the 0.5 * mean squared error'''
    m = y.shape[0]
    theta_t = theta.transpose()
    y_t = y.transpose()
    
    h = np.dot(X, theta)
    
    # Sum of squared errors
    sse = np.dot((h_t - y_t), (h - y))
    return 0.5 * (1/m) * sse

def gradient_descent(X, y, theta, alpha, iters):
    '''Minimizes theta using the gradient'''
    m = y.shape[0]
    J_history = []
    for i in range(iters):
        delta = (1/m) * (np.dot(np.dot(X.transpose(), X), theta) - (np.dot(X.transpose(), y)))
        theta = theta - alpha*delta
        
        J_history.append(compute_cost(X, y, theta))
    return [theta, J_history]

# Add a column of ones to X
X = np.vstack((np.ones(m), X)).transpose()
theta = np.zeros(2)

iterations = 1500
alpha = 0.01

compute_cost(X, y, theta)
theta, J_history = gradient_descent(X, y, theta, alpha, iterations)

In [None]:
# Answers
predict1 = np.dot(np.array([1, 3.5]), theta)
predict2 = np.dot(np.array([1, 7]), theta)

In [None]:
# Plot the line over the data
import matplotlib.pyplot as plt

ax = df.plot(x = 'population', y = 'profit', kind = 'scatter')

# Compute points for drawing the line
x_min, x_max = ax.get_xlim()
x_0 = x_min - 1
x_1 = x_max + 1
y_0 = np.dot(np.array([1, x_0]), theta)
y_1 = np.dot(np.array([1, x_1]), theta)

ax.plot([x_0, x_1], [y_0, y_1], c='r')

### Multivariate linear regression

In [None]:
# Loading the second dataset
df2 = pd.read_csv('Andrew-Ng-ML-Course-Assignments/machine-learning-ex1/ex1/ex1data2.txt', names = ['size','#_of_bedrooms','price'])
m2 = df2.shape[0]

In [None]:
def normalize_vector(v):
    return (v - np.mean(v)) / np.std(v)

# Feature Scaling
df2['size_scaled'] = normalize_vector(df2['size'])
df2['#_of_bedrooms_scaled'] = normalize_vector(df2['#_of_bedrooms'])

# Define X,y
X2 = df2[['size_scaled', '#_of_bedrooms_scaled']].values
y2 = df2['price'].values

# Add A vector of ones to X
X2 = np.hstack([np.ones((m2, 1)), X2])

n_features2 = X2.shape[1]
theta2 = np.zeros(n_features2)

iterations2 = 1500
alpha2 = 0.01

theta2, J_history2 = gradient_descent(X2, y2, theta2, alpha2, iterations2)

In [None]:
plt.plot(J_history)