/
Prototype.py
104 lines (92 loc) · 3.16 KB
/
Prototype.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import pandas as pd
import numpy as np
import csv
import matplotlib.pyplot as plt
###PARAM
n = 1
exp_corr = 0.000001
#read
filename = "data.csv"
dt = pd.read_csv(filename)
#Make data into np arrays
y = (np.array(dt[["y"]]))
# x = np.array(dt[["x1", "x2", "x3"]])
x = np.array(dt[["x1"]])
const_ = np.ones(x.shape[0]) #Add intercept
x = np.c_[const_, x]
#init coeff
B = np.random.rand(1, x.shape[1])
#define the logit function
def logit(x, B): #CHECKED
'''Returns a column vector Nx1'''
XB = np.matmul(B, np.transpose(x))
denom = np.add(1, np.exp(np.negative(XB)))
return np.transpose(np.add(np.divide(1, denom), exp_corr)) #TODO
def dlogLik_dB(x, y, B):
'''Returns a row vector 1xK+1 for each B'''
y_SXB = np.add(y, np.negative(logit(x, B)))
return np.matmul(np.transpose(y_SXB), x)
def d2logLik_dB2(x, y, B):
'''Returns a row vector 1xK+1 for each B'''
XtX = np.square(x)
_1_S = np.add(1, np.negative(logit(x, B)))
S_1_S = np.multiply(logit(x, B), _1_S)
return np.transpose(np.matmul(np.transpose(XtX), S_1_S))
def score(x, y, B):
'''Returns a row vector 1xK+1 for each B'''
return np.transpose(np.matmul(np.transpose(x), np.add(y, np.negative(logit(x, B)))))
def Hessian(x, y, B):
_1_S = np.add(1, np.negative(logit(x, B)))
S_1_S = np.multiply(logit(x, B), _1_S)
diag_S_1_S =np.diag(np.squeeze(np.transpose(S_1_S)))
Xt_S1S = np.matmul(np.negative(np.transpose(x)), diag_S_1_S)
return np.matmul(Xt_S1S, x)
def updateB(B, dLL, d2LL, n=n, method = "NR"):
if method == "NR":
''' ~~Newton Raphson method~~'''
dll_d2LL = np.multiply(n, np.divide(dLL, d2LL))
return np.add(B, dll_d2LL)
elif method == "GD":
''' ~~Gradient descent method~~'''
return np.add(B, np.multiply(n, dLL))
#SCORE METHOD
# dll_d2LL = np.matmul(score(x, y, B), np.linalg.inv(Hessian(x, y, B)))
# return np.add(B, dll_d2LL)
def learn(x, y, B, maxIterations = 500, min_grad = 0.00001):
'''
~~Newton Raphson method~~
is iteration > maxIterations?
if yes, quit and return B and error
get dLL, d2LL
check if dervive is above threshold
if yes update B
if not quit and return B
'''
iter = 0 #EDGE - avoids maxIterations = 0 from causing error
for iter in range(maxIterations):
dLL = dlogLik_dB(x, y, B)
d2LL = d2logLik_dB2(x, y, B)
if np.abs(np.mean(dLL)) <= min_grad or np.abs(np.mean(d2LL)) == 0:
print("-- Minimum gradient reached --")
break
else:
B = updateB(B, dLL, d2LL, n)
if (iter-1) % 10 == 0:
print("Iteration: {0}".format(iter))
print(" Fitted Values: {0}".format(B))
print(" Gradient: {0}".format(dLL))
print("____________________________________")
print("Number of Newton-Raphson iterations: {0}".format(iter))
print("Fitted Values: {0}".format(B))
print("____________________________________")
return B
B = learn(x, y, B)
y_ = logit(x, B)
x1 = dt["x1"]
x2 = dt["x2"]
x3 = dt["x3"]
plt.plot(x1,y, ".")
plt.plot(x1,y_, "b.")
# plt.plot(x3,y_, "r.")
# plt.plot(x2,y_, "g.")
plt.show()