**Homework 2**

By Team H

Functions implementation:

In [21]:
def predict_sales(radio, weight, bias):
    return weight*radio + bias

def cost_function(radio, sales, weight, bias):
    
    companies = len(radio)
    total_error = 0.0
    for i in range(companies):
        total_error += (sales[i] - (weight*radio[i] + bias))**2
    return total_error / companies

def update_weights(radio, sales, weight, bias, learning_rate):
    weight_deriv = 0
    bias_deriv = 0
    companies = len(radio)

    for i in range(companies):
        # Calculate partial derivatives
        # -2x(y - (mx + b))
        weight_deriv += -2*radio[i] * (sales[i] - (weight*radio[i] + bias))

        # -2(y - (mx + b))
        bias_deriv += -2*(sales[i] - (weight*radio[i] + bias))

    # We subtract because the derivatives point in direction of steepest ascent
    weight -= (weight_deriv / companies) * learning_rate
    bias -= (bias_deriv / companies) * learning_rate

    return weight, bias

def train(radio, sales, weight, bias, learning_rate, iters):
    cost_history = []

    for i in range(iters):
        weight,bias = update_weights(radio, sales, weight, bias, learning_rate)

        #Calculate cost for auditing purposes
        cost = cost_function(radio, sales, weight, bias)
        cost_history.append(cost)

        # Log Progress
        if i % 10 == 0:
            print ("iter: "+str(i) + " weight: "+str(round(weight, 2)) + " bias: " + str(round(bias, 3)) + " cost: "+str(round(cost, 2)))

    return weight, bias, cost_history

**Exercise 1**

After we imported the .csv files we set the starting weight, bias and learning rate as shown below, this is the output we obtained:

In [22]:
import pandas as pd
import matplotlib.pyplot as plt

dataset = pd.read_csv("Advertising.csv")
iters = 50

radio = dataset['radio'].values
sales = dataset['sales'].values
weight = .03
bias = .0014
learning_rate = 0.000011



weight, bias, cost_history = train(radio, sales, weight, bias, learning_rate, iters)

iter: 0 weight: 0.04 bias: 0.002 cost: 196.84
iter: 10 weight: 0.11 bias: 0.004 cost: 152.74
iter: 20 weight: 0.17 bias: 0.007 cost: 121.28
iter: 30 weight: 0.22 bias: 0.009 cost: 98.84
iter: 40 weight: 0.26 bias: 0.011 cost: 82.83


Function implementations for Exercise 2

In [2]:
def normalize(features):
    '''
    features     -   (200, 3)
    features.T   -   (3, 200)

    We transpose the input matrix, swapping
    cols and rows to make vector math easier
    '''

    for feature in features.T:
        fmean = np.mean(feature)
        frange = np.amax(feature) - np.amin(feature)

        #Vector Subtraction
        feature -= fmean

        #Vector Division
        feature /= frange

    return features

def predict(features, weights):
  '''
  features - (200, 3)
  weights - (3, 1)
  predictions - (200,1)
  '''
  return np.dot(features,weights)

def update_weights(features, targets, weights, lr):
    '''
    Features:(200, 3)
    Targets: (200, 1)
    Weights:(3, 1)
    '''
    x0 = "global"
    x1 = "global"
    x2 = "global"
    x3 = "global"
    predictions = predict(features, weights)

    #Extract our features
    x0 = features[:,0]
    x1 = features[:,1]
    x2 = features[:,2]
    x3 = features[:,3]

    # Use matrix cross product (*) to simultaneously
    # calculate the derivative for each weight
    d_w0 = -x0*(targets - predictions)
    d_w1 = -x1*(targets - predictions)
    d_w2 = -x2*(targets - predictions)
    d_w3 = -x3*(targets - predictions)

    # Multiply the mean derivative by the learning rate
    # and subtract from our weights (remember gradient points in direction of steepest ASCENT)
    weights[0][0] -= (lr * np.mean(d_w0))
    weights[1][0] -= (lr * np.mean(d_w1))
    weights[2][0] -= (lr * np.mean(d_w2))
    weights[3][0] -= (lr * np.mean(d_w3))

    return weights

def cost_function(features, targets, weights):
    
    #Features:(200,3)
    #Targets: (200,1)
    #Weights:(3,1)
    #Returns 1D matrix of predictions
    
    N = len(targets)

    predictions = predict(features, weights)

    # Matrix math lets use do this without looping
    sq_error = (predictions - targets)**2

    # Return average squared error among predictions
    return 1.0/(2*N) * sq_error.sum()

'''

X = [
    [x1, x2, x3]
    [x1, x2, x3]
    [x1, x2, x3]
]

targets = [
    [1],
    [2],
    [3]
]
'''
def update_weights_vectorized(X, targets, weights, lr):
    '''
    gradient = X.T * (predictions - targets) / N
    X: (200, 3)
    Targets: (200, 1)
    Weights: (3, 1)
    '''
    companies = len(X)

    #1 - Get Predictions
    predictions = predict(X, weights)

    #2 - Calculate error/loss
    error = targets - predictions

    #3 Transpose features from (200, 3) to (3, 200)
    # So we can multiply w the (200,1)  error matrix.
    # Returns a (3,1) matrix holding 3 partial derivatives --
    # one for each feature -- representing the aggregate
    # slope of the cost function across all observations
    gradient = np.dot(-X.T,  error)

    #4 Take the average error derivative for each feature
    gradient /= companies

    #5 - Multiply the gradient by our learning rate
    ##gradient *= lr
  
    #6 - Subtract from our weights to minimize cost(
   
    weights[0] -= (lr * np.mean(gradient[:,0]))
    weights[1] -= (lr * np.mean(gradient[:,1]))
    weights[2] -= (lr * np.mean(gradient[:,2]))
    weights[3] -= (lr * np.mean(gradient[:,3]))
    
    return weights

def train(features, sales, weight, learning_rate, iters):
    cost_history = []
    
    #print(features)
    for i in range(iters):
        weight = update_weights(features, sales, weight, learning_rate)

        #Calculate cost for auditing purposes
        cost = cost_function(features, sales, weight)
        cost_history.append(cost)

    return weight, cost_history

**Exercise 2**

After reading the csv file with 1000 iterations and the learning rate below, our output (bias, learning rate) matches the given model.

In [20]:
import numpy as np
import pandas as pd

W1 = 0.0
W2 = 0.0
W3 = 0.0
weights = np.array([
    [1],
    [W1],
    [W2],
    [W3]
])
iters = 1000

learning_rate = 0.004

dataset = pd.read_csv("Advertising.csv")
features=dataset[['TV','radio','newspaper']].values
sales =dataset['sales'].values

features=normalize(features)

bias = np.ones(shape=(len(features),1))
features = np.append(bias, features, axis=1)
weights,cost = train(features, sales, weights, learning_rate, iters)

print("Sales = " + str(np.round(weights[1], 2)) + "TV + " + str(np.round(weights[2], 2)) + "Radio + " + str(np.round(weights[3], 2)) + "Newspaper + " + str(np.round(weights[0], 2)))

Sales = [4.72]TV + [3.58]Radio + [0.91]Newspaper + [13.79]
