In [2]:
# import libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import scale
from sklearn.model_selection import train_test_split 


In [3]:
# import raw data

raw_data = pd.read_csv('data/Advertising.csv').drop('Unnamed: 0', axis=1)
raw_data.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


In [4]:
# scale data

data = scale(raw_data)
data = pd.DataFrame(data, columns=raw_data.columns.str.lower())
data.head()

Unnamed: 0,tv,radio,newspaper,sales
0,0.969852,0.981522,1.778945,1.552053
1,-1.197376,1.082808,0.669579,-0.696046
2,-1.516155,1.528463,1.783549,-0.907406
3,0.05205,1.217855,1.286405,0.86033
4,0.394182,-0.841614,1.281802,-0.215683


In [5]:
# insert bias term

data.insert(0, 'bias', 1)
data.head()

Unnamed: 0,bias,tv,radio,newspaper,sales
0,1,0.969852,0.981522,1.778945,1.552053
1,1,-1.197376,1.082808,0.669579,-0.696046
2,1,-1.516155,1.528463,1.783549,-0.907406
3,1,0.05205,1.217855,1.286405,0.86033
4,1,0.394182,-0.841614,1.281802,-0.215683


In [6]:
# constants
init_w = np.array([0.0, 0.0, 0.0, 0.0])

np.random.seed(123)

In [62]:
# functions
def initialize_weights():
    return init_w

def predict(x, weights):
    return np.dot(x, weights)   

def compute_cost(y, y_hat):
    return np.mean((y_hat - y)**2) / 2

def compute_gradient(x, y, y_hat, learning_rate):
    return np.dot(x.T, (y_hat - y)) * learning_rate / y.size
    # return np.sum(x * (y_hat - y), axis=0) * learning_rate / y.size

def update_weights(weight, gradient):
    return weight - gradient 

def grad_descent(x, y, learning_rate, num_iterations):
    weights = initialize_weights()
    cost_history = []
    
    for i in range(num_iterations):
        y_hat = predict(x, weights)
        cost = compute_cost(y, y_hat)
        cost_history.append(cost)
        
        gradient = compute_gradient(x, y, y_hat, learning_rate)
        weights = update_weights(weights, gradient)
    return weights, cost_history

def plot_costs(cost_history, learning_rate):
    plt.figure(figsize=(15, 10))  # Corrected figure size

    plt.scatter(
        x=range(len(cost_history)),
        y=cost_history
    )
    plt.xlabel('Iterations')
    plt.ylabel('Cost')
    plt.title('Cost vs Iterations, Learning Rate={}'.format(learning_rate))
    plt.yticks(np.arange(min(cost_history), max(cost_history), step=0.05))
    plt.show()

def unstandardize(y_hat, raw_data, column_name='Sales'):
    mean = raw_data[column_name].mean()
    std = raw_data[column_name].std()
    return y_hat * std + mean

In [8]:
# divide data into training (.75) and testing (.25) sets

random_state = np.random.RandomState(0)

x = data.drop('sales', axis=1)  # predictors
y = data['sales']               # response

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

In [64]:
# parameters
learning_rate = 0.1
num_iterations = 500

# run gradient descent
weights, cost_history = grad_descent(x_train, y_train, learning_rate, num_iterations)

plot_costs(cost_history, learning_rate)
print(cost_history[-1])


ValueError: operands could not be broadcast together with shapes (4,) (154,) 