In [2]:
import numpy as np
import numpy.matlib
import pandas as pd
#from sklearn import linear_model as lm
#import matplotlib.pyplot as plt
#%matplotlib inline

data = pd.read_csv('Score_Pass.csv')
X, y = np.array(data['Score']), np.array(data['Pass/N'])
x=X

$$\theta_j=\theta_j + \alpha\frac{1}{m}\sum_{i=1}^m\left[ y^{(i)}-h_\theta\left(x^{(i)}\right)\right]\,x_j^{(i)}$$

In [11]:
def sigmoid(x, Θ_1, Θ_2):                                                        
    z = (Θ_1*x + Θ_2).astype("float_")                                              
    return 1.0 / (1.0 + np.exp(-z)) 


def Cost(x, y, Θ_1, Θ_2):                                                                
    sigmoid_probs = sigmoid(x, Θ_1, Θ_2)                                        
    return np.sum(y * np.log(sigmoid_probs)
                  + (1 - y) * np.log(1 - sigmoid_probs)) 

def gradient(x, y, Θ_1, Θ_2):                                                         
    sigmoid_probs = sigmoid(x, Θ_1, Θ_2)                                        
    return np.array([[np.sum((y - sigmoid_probs) * x),                          
                     np.sum((y - sigmoid_probs) * 1)]])                         

def hessian(x, y, Θ_1, Θ_2):                                                          
    sigmoid_probs = sigmoid(x, Θ_1, Θ_2)                                        
    d1 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x * x)                  
    d2 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * x * 1)                  
    d3 = np.sum((sigmoid_probs * (1 - sigmoid_probs)) * 1 * 1)                  
    H = np.array([[d1, d2],[d2, d3]])                                           
    return H




In [15]:
def GradDe(X,y,Max_Loop=20, alpha=0.00001):
    #alpha = 0.00000001
    #Max_Loop = 200
    Θ_1 = 0.0001
    Θ_2 = -0.04
    
    for l in range(Max_Loop):
        Θ_1 = Θ_1 + alpha * np.sum((y-sigmoid(X, Θ_1, Θ_2)) * X)
        Θ_2 = Θ_2 + alpha * np.sum(y-sigmoid(X, Θ_1, Θ_2))
        
        print(Cost(X, y, Θ_1, Θ_2), gradient(X,y,Θ_1, Θ_2))
        
    print([Θ_1, Θ_2])
    return [Θ_1, Θ_2]

In [16]:
weights = GradDe(X,y,20,0.00000001)

-503.725836684 [[-964.73121299  -75.47359542]]
-503.719835005 [[-267.92804485  -74.39704665]]
-503.71932146 [[-74.34971992 -74.09796698]]
-503.71923136 [[-20.57238286 -74.01487521]]
-503.719173908 [[ -5.63262743 -73.99178614]]
-503.719118966 [[ -1.48224279 -73.98536628]]
-503.719064217 [[ -0.32923191 -73.98357725]]
-503.719009481 [[ -8.91599379e-03  -7.39830747e+01]]
-503.718954746 [[  0.0800704  -73.98292956]]
-503.718900011 [[  0.10479155 -73.9828837 ]]
-503.718845276 [[  0.11165928 -73.98286542]]
-503.718790541 [[  0.11356718 -73.98285481]]
-503.718735806 [[  0.1140972  -73.98284632]]
-503.718681072 [[  0.11424444 -73.98283843]]
-503.718626337 [[  0.11428533 -73.9828307 ]]
-503.718571602 [[  0.11429669 -73.98282301]]
-503.718516867 [[  0.11429983 -73.98281534]]
-503.718462132 [[  0.1143007  -73.98280767]]
-503.718407398 [[  0.11430093 -73.98280001]]
-503.718352663 [[  0.11430098 -73.98279234]]
[5.1940616299646171e-05, -0.040014817240837067]


In [None]:
5.1940616299646171e-05, -0.040014817240837067

In [19]:
def newtons_method(x, y):                                                             
    """
    """

    # Initialize Cost & parameters                                                                   
    Θ_1 = 0.001                                                                     
    Θ_2 = -0.4 # The intercept term                                                                 
    delta_l = np.Infinity                                                                
    l = Cost(x, y, Θ_1, Θ_2)                                                                 
    # Convergence Conditions                                                        
    δ = .0000000001                                                                 
    max_iterations = 15                                                            
    i = 0                                                                           
    while abs(delta_l) > δ and i < max_iterations:                                       
        i += 1                                                                      
        g = gradient(x, y, Θ_1, Θ_2)                                                      
        hess = hessian(x, y, Θ_1, Θ_2)                                                 
        H_inv = np.linalg.inv(hess)                                                 
        # @ is syntactic sugar for np.dot(H_inv, g.T)¹
        delta = H_inv @ g.T                                                             
        delta_Θ_1 = delta[0][0]                                                              
        delta_Θ_2 = delta[1][0]  
        print(Θ_1,Θ_2,l,g)
                                                                                    
        # Perform our update step                                                    
        Θ_1 += delta_Θ_1                                                                 
        Θ_2 += delta_Θ_2                                                                 
                                                                                    
        # Update the log-likelihood at each iteration                                     
        l_new = Cost(x, y, Θ_1, Θ_2)                                                      
        delta_l = l - l_new                                                           
        l = l_new                                                                
    return np.array([Θ_1, Θ_2])      

In [20]:
newtons_method(x, y)

0.001 -0.4 -483.268693545 [[-28024.3244961   -113.6228469]]
0.0112576600667 -7.27041636976 -194.098783032 [[-4751.72666163   -27.94893012]]
0.0196272786295 -12.7884192394 -120.225925481 [[-1303.99280455   -10.2602213 ]]
0.0297371807762 -19.4082678025 -84.7723651278 [[ 224.55427607   -2.97851801]]
0.0418724485742 -27.2808836967 -68.057215957 [[ 460.36914031   -0.54775886]]
0.0542514160377 -35.2634488806 -61.882910202 [[  2.75407730e+02   3.10212067e-02]]
0.0626937102006 -40.6883474956 -60.6575636785 [[  8.31196135e+01   5.16989581e-02]]
0.0652583036832 -42.3325318804 -60.5908898311 [[  6.74827902e+00   5.85997656e-03]]
0.0654319147359 -42.4436576359 -60.5906289243 [[  3.39533720e-02   3.37330329e-05]]
0.0654326355822 -42.4441184864 -60.5906289199 [[  6.64767924e-07   7.06533165e-10]]


array([  0.06543264, -42.44411849])