In [13]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math 
import copy

In [14]:
def compute_cost(x,y,w,b):
    m = x.shape[0]
    err = 0 
    for i in range(m):
        fw_b = np.dot(x[i],w)+b
        err += (fw_b-y[i])**2
    err = err/(2*m)
    return err

In [15]:
def gradient(x,y,w,b):
    m,n = x.shape
    djw = np.zeros((n,))
    djb = 0
    for i in range(m):
        temp_dw = (np.dot(x[i],w)+b-y[i])
        djb += np.dot(x[i],w)+b-y[i]
        for j in range(n):
            djw += temp_dw*x[i,j]
    djw = djw/m
    djb = djb/m
    return djw,djb

In [16]:
def gradient_descent(x,y,w_in,b_in,alpha,iter,fun_cost,fun_grad):
    w = copy.deepcopy(w_in)
    b = b_in
    j = []
    for i in range(iter):
        djw,djb = fun_grad(x,y,w_in,b_in)
        w = w - alpha*djw
        b = b - alpha*djb
        if i<100000:
            j.append(fun_cost(x,y,w,b))
        if i% math.ceil(iter / 10) == 0:
            print(f"Iteration {i:4d}: Cost {j[-1]:8.2f}   ")
    return w,b,j

In [17]:
df = pd.read_csv('happy_index.csv')


y = df['Score'].values
selected_col = ['GDP per capita','Social support','Healthy life expectancy','Freedom to make life choices','Generosity','Perceptions of corruption']
x = df[selected_col].values



In [18]:
x_train = x[:150]
x_test = x[150:]
y_train = y[:150]
y_test = y[150:]


In [44]:
w_init = np.zeros(x.shape[1])
b_init = 0
iteration = 1000
alpha = 8.0e-5

w_final, b_final,j = gradient_descent(x_train,y_train,w_init,b_init,alpha,iteration,compute_cost,gradient)

Iteration    0: Cost    15.60   
Iteration  100: Cost    12.19   
Iteration  200: Cost     9.20   
Iteration  300: Cost     6.65   
Iteration  400: Cost     4.51   
Iteration  500: Cost     2.81   
Iteration  600: Cost     1.53   
Iteration  700: Cost     0.68   
Iteration  800: Cost     0.26   
Iteration  900: Cost     0.26   


In [45]:
print(j)

[15.60401372788125, 15.56776688444547, 15.531562759692642, 15.495401353622784, 15.459282666235877, 15.423206697531922, 15.387173447510953, 15.351182916172933, 15.315235103517853, 15.279330009545761, 15.24346763425662, 15.207647977650435, 15.17187103972722, 15.136136820486954, 15.100445319929655, 15.06479653805531, 15.02919047486395, 14.993627130355517, 14.958106504530067, 14.922628597387568, 14.887193408928036, 14.851800939151456, 14.816451188057847, 14.781144155647189, 14.745879841919496, 14.710658246874758, 14.675479370512985, 14.640343212834175, 14.605249773838326, 14.570199053525435, 14.535191051895497, 14.500225768948537, 14.465303204684524, 14.430423359103466, 14.39558623220539, 14.360791823990263, 14.326040134458077, 14.291331163608884, 14.256664911442622, 14.222041377959338, 14.187460563159023, 14.152922467041662, 14.118427089607252, 14.083974430855806, 14.049564490787327, 14.015197269401808, 13.980872766699234, 13.946590982679641, 13.912351917342994, 13.878155570689305, 13.844

In [46]:
print(f'w_final:{w_final}, b_final: {b_final}')

w_final:[1.64667678 1.64667678 1.64667678 1.64667678 1.64667678 1.64667678], b_final: 0.4396922666666763


In [47]:
m1 = x_test.shape[0]
pred = []
for i in range(m1):
    pred.append(np.dot(x_train[i],w_final) + b_final)
print(pred)
print(y_test)

[8.763643404394529, 9.012291598617457, 9.188486014391321, 8.826217122145994, 8.827863798928927, 9.015584952183323]
[3.38  3.334 3.231 3.203 3.083 2.853]


In [48]:
print(compute_cost(x_test,y_test,w_final,b_final))

0.7859498442854295
