In [1]:
import pandas as pd
import numpy as np
import math, copy

In [2]:
dataset = pd.read_csv('data/Salary_dataset.csv')

In [3]:
dataset.sample()

Unnamed: 0.1,Unnamed: 0,YearsExperience,Salary
22,22,8.0,101303.0


In [4]:
type(dataset)

pandas.core.frame.DataFrame

In [5]:
dataset.shape

(30, 3)

In [6]:
dataset.isnull().sum()

Unnamed: 0         0
YearsExperience    0
Salary             0
dtype: int64

In [7]:
def compute_cost(x, y, w, b):
    m = len(x)
    total_cost = 0
    for i in range(m):
        f_x = w * x[i] + b 
        err = (f_x - y[i]) ** 2
        total_cost += err
        
    return total_cost / (2 * m)

In [8]:
def compute_gradient(x, y, w, b):
    m = len(x)
    dj_dw = 0
    dj_db = 0
    
    for i in range(m):  
        f_wb = w * x[i] + b 
        #print("f_wb: ", f_wb)
        dj_dw_i = (f_wb - y[i]) * x[i] 
        #print("dj_dw_i: ", dj_dw_i)
        dj_db_i = f_wb - y[i]
        #print("dj_db_i: ", dj_db_i)
        dj_db += dj_db_i
        #print("dj_db: ", dj_db)
        dj_dw += dj_dw_i
        #print("dj_dw: ", dj_dw)
    dj_dw = dj_dw / m 
    dj_db = dj_db / m 
        
    return list(dj_dw), list(dj_db)

In [14]:
def gradient_descent(x, y, w_in, b_in, alpha, num_iters, cost_function, gradient_function):
    J_history = []
    p_history = []
    b = b_in
    w = w_in
    
    for i in range(num_iters):
        # Calculate the gradient and update the parameters using gradient_function
        dj_dw, dj_db = gradient_function(x, y, w , b)     

        # Update Parameters using equation (3) above
        b = b - alpha * dj_db[0]                            
        w = w - alpha * dj_dw[0]  
        
 
    return w, b

In [15]:
dataset.columns

Index(['Unnamed: 0', 'YearsExperience', 'Salary'], dtype='object')

In [16]:
X, y = dataset[['YearsExperience']].values,  dataset[['Salary']].values

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y , random_state=104,test_size=0.25, shuffle=True)

In [18]:
# initialize parameters
w_init = 0
b_init = 0
# some gradient descent settings
iterations = 10000
tmp_alpha = 1.0e-2
# run gradient descent
w_final, b_final = gradient_descent(X_train ,y_train, w_init, b_init, tmp_alpha, 
                                                    iterations, compute_cost, compute_gradient)
print(f"(w,b) found by gradient descent: ({w_final:8.4f},{b_final:8.4f})")

(w,b) found by gradient descent: (9354.3139,25370.6674)


In [25]:
def predict(x, w, b):
    return (w * x) + b

In [26]:
y_pred = predict(X_test, w_final, b_final)

In [27]:
y_pred

array([[116107.51248253],
       [ 81496.55097526],
       [100205.17881703],
       [ 82431.98236735],
       [ 63723.35452557],
       [ 60917.06034931],
       [ 92721.72768032],
       [ 72142.23705437]])

In [28]:
y_test

array([[112636.],
       [ 81364.],
       [101303.],
       [ 93941.],
       [ 56958.],
       [ 57190.],
       [ 98274.],
       [ 67939.]])

In [29]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression()

In [30]:
reg = reg.fit(X_train, y_train)

In [33]:
reg.coef_

array([[9354.31392084]])

In [34]:
reg.score(X_train, y_train)

0.9618943902999022

In [36]:
reg.predict(X_test)

array([[116107.51248241],
       [ 81496.55097529],
       [100205.17881698],
       [ 82431.98236738],
       [ 63723.35452569],
       [ 60917.06034944],
       [ 92721.7276803 ],
       [ 72142.23705445]])

# It seems that the results of the two models (From SCRATCH model and Scikit Learn Model) are same 