# Import numpy, pandas, matpotlib, and sklearn libraries

In [90]:
import numpy as np
import pandas as pd
import matplotlib as plt
from sklearn.preprocessing import StandardScaler, scale
from sklearn.model_selection import train_test_split

# Load Advertising.csv using pandas

In [32]:
df = pd.read_csv("data/Advertising.csv", index_col = 0)

In [33]:
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [34]:
x = df.drop("Sales", axis = 1)
y = df["Sales"]
x.head()

Unnamed: 0,TV,Radio,Newspaper
1,230.1,37.8,69.2
2,44.5,39.3,45.1
3,17.2,45.9,69.3
4,151.5,41.3,58.5
5,180.8,10.8,58.4


# Standardize each column of the dataset

In [99]:
scaled = pd.DataFrame(scale(x), columns=["TV","Radio","Newspaper"])
scaled.insert(0, 'bias', [1]*len(scaled))
scaled

Unnamed: 0,bias,TV,Radio,Newspaper
0,1,0.969852,0.981522,1.778945
1,1,-1.197376,1.082808,0.669579
2,1,-1.516155,1.528463,1.783549
3,1,0.052050,1.217855,1.286405
4,1,0.394182,-0.841614,1.281802
...,...,...,...,...
195,1,-1.270941,-1.321031,-0.771217
196,1,-0.617035,-1.240003,-1.033598
197,1,0.349810,-0.942899,-1.111852
198,1,1.594565,1.265121,1.640850


# Divide the dataset into training and testing, with 0.85 and 0.15 ratio, respectively.

In [104]:
x_train, x_test, y_train, y_test = train_test_split(scaled, y, train_size=0.85, test_size=0.15, random_state = 42)

In [105]:
print("x_train size:", x_train.shape)
print("y_train size:", y_train.shape)
print("\nx_test size:", x_test.shape)
print("y_test size:", y_test.shape)

x_train size: (170, 4)
y_train size: (170,)

x_test size: (30, 4)
y_test size: (30,)


# Fit the model on the training set

In [169]:
def initialize_weights():
    np.random.seed(2)
    init_w = np.random.uniform(low=0.0, high=1.0, size=4)
    return init_w

In [170]:
weight = initialize_weights()
weight

array([0.4359949 , 0.02592623, 0.54966248, 0.43532239])

In [143]:
def predict(features, weight):
    predictions = np.dot(np.dot(np.linalg.inv(np.dot(x_train.T,x_train)),x_train.T),y_train)
    return predictions

In [144]:
predict(x_train, weight)

603

In [148]:
beta = np.dot(np.dot(np.linalg.inv(np.dot(x_train.T,x_train)),x_train.T),y_train)
y_train_hat = np.sum(np.multiply(beta,x_train))
y_train_hat

bias         2384.211354
TV             21.342638
Radio          -7.195736
Newspaper      -0.058256
dtype: float64