-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgradient_decent.py
79 lines (53 loc) · 1.78 KB
/
gradient_decent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
from ex1 import sigmoid as sig
import matplotlib.pyplot as plt
import numpy as np
data = pd.read_csv("A4_Q7_data.csv")
X = np.array(data['X'])
Y = np.array(data['Y'])
def loss(y,yr):
return 0.5 *(y-yr)**2
def error(w,b):
err = 0.0
for x,y in zip(X,Y):
fx = sig((w*x+b))
err += loss(fx,y)
return err
def grad_b(w,b,x,y):
fx = sig((w*x+b))
return (fx - y) * fx *(1-fx)
def grad_w(w,b,x,y):
fx = sig((w*x+b))
return (fx - y) * fx *(1-fx) * x
def do_gradient_decent():
w,b,eta,max_epochs,dw,db=1,1,0.01,100,0,0
w_history,b_history,error_history = [],[],[]
beta1,beta2=0.9,0.99
m_w,m_b,v_w,v_b,eps=0,0,0,0,1e-8
for i in range(max_epochs):
dw, db =0,0
for x,y in zip(X,Y):
dw +=grad_w(w,b,x,y)
db +=grad_b(w,b,x,y)
#adam loss function
m_w = beta1 * m_w +(1-beta1)*dw
m_b = beta1 * m_b +(1-beta1)*db
v_w = beta2 * v_w +(1-beta2)*dw**2
v_b = beta2 * v_b +(1-beta2)*db**2
w = w - (eta / np.sqrt(v_w + eps))*m_w
b = b - (eta / np.sqrt(v_b + eps)) * m_b
w_history.append(w)
b_history.append(b)
error_history.append(error(w,b))
from mpl_toolkits.mplot3d import axes3d
fig = plt.figure()
ax = fig.gca(projection='3d')
# Plot a basic wireframe.
from matplotlib import cm
surf = ax.plot_surface(w_history, b_history, error_history,cmap=cm.coolwarm,
linewidth=0, antialiased=False)
print(error_history)
fig.colorbar(surf, shrink=0.5, aspect=5)
plt.contour(w_history, b_history, error_history)
plt.show()
do_gradient_decent()