### End goal: Given the amount company spends on radio advertisement predict the sales of the company
### Method: Linear Regression
#### a. via for loops
#### b. via matix calculations
#### b. via scikit-learn package

### via gradient descent

In [1]:
import pandas as pd

def load_data_from_csv(csv_path=""):
    df = pd.read_csv(csv_path)
    radio_spendings = [df.loc[i]['radio'] for i in range(len(df))]
    sales = [df.loc[i]['sales'] for i in range(len(df))]
    return radio_spendings, sales
# load_data_from_csv("advertising.csv")

In [2]:
# apply gradient descent and update parameters(w,b) during 1 epoch

def update_w_and_b(spendings, sales, w, b, alpha):
    N = len(spendings)
    #     
    dl_dw = 0.0
    dl_db = 0.0
    
    for i in range(N):
        # partial derivative of loss function wrt w for ith sample         
        dl_dw += -2*(sales[i] - w*spendings[i] + b) *spendings[i]
        # partial derivative of loss function wrt b for ith sample
        dl_db += -2*(sales[i] - (w*spendings[i] + b))
    
    w = w - (1/float(N))*(alpha*dl_dw)
    b = b - (1/float(N))*(alpha*dl_db)
    return w, b
                    

In [3]:
# run through multiple epochs 
def train(spendings, sales, w, b, alpha, epochs):
    for e in range(epochs):
        w, b = update_w_and_b(spendings, sales, w, b, alpha)
        
        if e%400==0:
            l= avg_loss(spendings, sales, w, b)
            print(f"epoch: {e}, loss: {l}")
    return w, b

In [4]:
# function that calculates mean squared error
def avg_loss(spendings, sales, w, b):
    N = len(spendings)
    total_error = 0.0
    for i in range(N):
        total_error += (sales[i] - (w*spendings[i] + b))**2
    return total_error / float(N)

In [5]:
def predict(new_x, w, b):
    return w*new_x + b

In [6]:
spendings, sales = load_data_from_csv("advertising.csv")
w, b = train(spendings, sales, 0.0, 0.0, 0.001, 15000)
print(f"w={w}, b={b}")
x_new= 23.0
predict(x_new, w, b)

epoch: 0, loss: 92.32078294903626
epoch: 400, loss: 41.11464748716807
epoch: 800, loss: 41.97329319752943
epoch: 1200, loss: 42.27644837311585
epoch: 1600, loss: 42.35875187053051
epoch: 2000, loss: 42.37995725978558
epoch: 2400, loss: 42.38535255764593
epoch: 2800, loss: 42.38672097816313
epoch: 3200, loss: 42.38706777810902
epoch: 3600, loss: 42.38715565024873
epoch: 4000, loss: 42.3871779141506
epoch: 4400, loss: 42.38718355501624
epoch: 4800, loss: 42.38718498420242
epoch: 5200, loss: 42.387185346304896
epoch: 5600, loss: 42.387185438048164
epoch: 6000, loss: 42.38718546129245
epoch: 6400, loss: 42.387185467181716
epoch: 6800, loss: 42.38718546867381
epoch: 7200, loss: 42.38718546905183
epoch: 7600, loss: 42.38718546914762
epoch: 8000, loss: 42.3871854691719
epoch: 8400, loss: 42.38718546917805
epoch: 8800, loss: 42.387185469179606
epoch: 9200, loss: 42.38718546918
epoch: 9600, loss: 42.38718546918
epoch: 10000, loss: 42.38718546918
epoch: 10400, loss: 42.38718546918
epoch: 10800, 

13.881176133240817

### via matrix calculations

In [7]:
# function that calculates mean squared error
def m_avg_loss(spendings, sales, w, b):
    N = len(spendings)
    total_error = 0.0
#     for i in range(N):
#         total_error += (sales[i] - (w*spendings[i] + b))**2
    error_matrix = (sales- spendings*w +b)**2
    total_error = sum(error_matrix)
    return total_error / float(N)

In [8]:
# update w and b during 1 epoch

def m_update_w_and_b(spendings, sales, w, b, alpha):
    N = len(spendings)       
    
    dl_dw_matrix = -2*(sales-w*spendings+b) *spendings
    dl_db_matrix = -2*(sales - (w*spendings + b))
    
    dl_dw = sum(dl_dw_matrix)
    dl_db = sum(dl_db_matrix)
    
    w = w - (1/float(N))*(alpha*dl_dw)
    b = b - (1/float(N))*(alpha*dl_db)
    return w, b

In [9]:
def m_train(spendings, sales, w, b, alpha, epochs):
    for e in range(epochs):
        w, b = m_update_w_and_b(spendings, sales, w, b, alpha)
        
        if e%400==0:
            l= m_avg_loss(spendings, sales, w, b)
            print(f"epoch: {e}, loss: {l}")
    return w, b
        

In [10]:
def m_predict(new_x, w, b):
    return w*new_x + b

In [11]:
#via matrix
import numpy as np
spendings, sales = load_data_from_csv("advertising.csv")
N = len(spendings)
spendings = np.array(spendings).reshape(N,1)
sales = np.array(sales).reshape(N,1)
w, b = m_train(spendings, sales, 0.0, 0.0, 0.001, 15000)
print(f"w={w}, b={b}")
x_new= 23.0
m_predict(x_new, w, b)


epoch: 0, loss: [91.95930944]
epoch: 400, loss: [49.81476655]
epoch: 800, loss: [51.61160736]
epoch: 1200, loss: [52.07471877]
epoch: 1600, loss: [52.1925584]
epoch: 2000, loss: [52.22244695]
epoch: 2400, loss: [52.23002167]
epoch: 2800, loss: [52.23194095]
epoch: 3200, loss: [52.23242723]
epoch: 3600, loss: [52.23255044]
epoch: 4000, loss: [52.23258166]
epoch: 4400, loss: [52.23258956]
epoch: 4800, loss: [52.23259157]
epoch: 5200, loss: [52.23259208]
epoch: 5600, loss: [52.2325922]
epoch: 6000, loss: [52.23259224]
epoch: 6400, loss: [52.23259225]
epoch: 6800, loss: [52.23259225]
epoch: 7200, loss: [52.23259225]
epoch: 7600, loss: [52.23259225]
epoch: 8000, loss: [52.23259225]
epoch: 8400, loss: [52.23259225]
epoch: 8800, loss: [52.23259225]
epoch: 9200, loss: [52.23259225]
epoch: 9600, loss: [52.23259225]
epoch: 10000, loss: [52.23259225]
epoch: 10400, loss: [52.23259225]
epoch: 10800, loss: [52.23259225]
epoch: 11200, loss: [52.23259225]
epoch: 11600, loss: [52.23259225]
epoch: 12000

array([13.88117613])

### via scikit-learn 

In [12]:
# via scikit-learn
import numpy as np
def sci_train(x,y):
    from sklearn.linear_model import LinearRegression
    model = LinearRegression().fit(x,y)
    return model

spendings = np.array(spendings)
sales = np.array(sales)
model = sci_train(spendings.reshape(-1,1), sales)
x_new = [[23.0]]
y_new = model.predict(x_new)
print(y_new)

[[13.96904111]]


In [13]:
# Probe further: 
# How does choice of learning rate effects the training?
# Does more epoch means more accurate. Are their other ways in which gradient descent can be stopped pre maturely?
# How does initial choice of parameters effect learning? 

### References:
1. [Anatomy of a machine learning algorithm](https://www.dropbox.com/s/xpd5x6p6jte3th5/Chapter4.pdf)
2. [scikit-learn Machine Learning in Python](https://scikit-learn.org/stable/index.html)
3. [NumPy Quickstart](https://numpy.org/devdocs/user/quickstart.html)
3. [Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample
](https://stackoverflow.com/questions/58663739/reshape-your-data-either-using-array-reshape-1-1-if-your-data-has-a-single-fe) 