In [1]:
import numpy as np
from numpy import linalg as LA
import time, random
import matplotlib.pyplot as plt
import math

# <center>Cost Function and Gradient as Matrix Multiplication</center>

We defined our function as $J(\theta)=\frac{1}{m}\sum_{i=1}^{m}-y_i\log{(g(x_{i,*}\theta))}-(1-y_i)\log{(1-g(x_{i,*}\theta))}$, where $g(z)=\frac{1}{1+e^{-z}}$. Rewriting the inside summation

$$
-y_i\log{(g(x_{i,*}\theta))}-(1-y_i)\log{(1-g(x_{i,*}\theta))}=
y_i\log{(1+e^{-x_{i,*}\theta})}-(1-y_i)\log{(e^{-x_{i,*}\theta})}+(1-y_i)\log{(1+e^{-x_{i,*}\theta})}
$$

$$
=(1-y_i)(x_{i,*}\theta)+\log{(1+e^{-x_{i,*}\theta})}
$$

$$
=\log{(1+e^{-x_{i,*}\theta})}+(x_{i,*}\theta)-(y_i)(x_{i,*}\theta)
$$

$$
=\log{(1+e^{-x_{i,*}\theta})}-\ln{(e^{-x_{i,*}\theta})}-(y_i)(x_{i,*}\theta)
$$

$$
=\log{(1+e^{x_{i,*}\theta})}-(y_i)(x_{i,*}\theta)
$$

Hence, a more compact way of expressing the cost function is: 

$$J(\theta)=\frac{1}{m}\sum_{i=1}^{m} \log{(1+e^{x_{i,*}\theta})}-(y_i)(x_{i,*}\theta)$$

$$J(\theta)=-\frac{1}{m}(\tilde{X}\theta)^tY+\frac{1}{m}\sum_{i=1}^{m}\log(1+e^{x_{i,*}\theta})$$


We can now easily compute partial derivatives

$$\frac{\partial J}{\partial\theta_k} = \frac{1}{m}\sum_{i=1}^{m} \frac{x_{ik}e^{x_{i,*}\theta}}{1+e^{x_{i,*}\theta}} - x_{ik}y_i$$

$$ = \frac{1}{m}\sum_{i=1}^{m} x_{ik} \biggl(\frac{1}{1+e^{-x_{i,*}\theta}} - y_i\biggl)$$

$$ = \frac{1}{m}\sum_{i=1}^{m} x_{ik} (g(x_{i,*}\theta) - y_i)$$

$$ = \frac{1}{m}(x_{*,k})^t[g(\tilde{X}\theta)-Y]$$

It follows that

$$\nabla J = \frac{1}{m}(\tilde{X})^t[g(\tilde{X}\theta)-Y]$$

In [2]:
def calculateCost(X, Y, theta):
    return 1/len(X)*(np.log(1+np.exp(X@theta)).sum()-X@theta@Y)

In [3]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

def getNewTheta(X,Y, theta, alpha = 1):
    c = sigmoid(X@theta)-Y
    return theta-alpha*X.T@c/len(X)

In [4]:
def runLogisticRegression(X,Y, theta, steps, alpha = 1, log = False):
    
    ans = theta        
    stepCount = [0]
    cost = [calculateCost(X, Y, ans)]

    for i in range(steps):
        ans = getNewTheta(X, Y, ans, alpha)
        cost.append(calculateCost(X, Y, ans))
        stepCount.append(i+1)
        
    plt.scatter(stepCount, cost)
    plt.title('Cost vs Steps')
    plt.xlabel('Steps')
    plt.ylabel('Cost')
    
    if log:
        plt.xscale('log')
        plt.yscale('log')
    
    plt.show()
        
    return ans

In [5]:
# have not yet tested if this code actually works since I have not found some sample data fit for logistic regression

In [6]:
import pandas as pd

In [7]:
# Import the os module
import os
# Change the current working directory
os.chdir('/Users/alexchandler/Documents/GitHub/knot_machine_learning/dataset')

In [8]:
knot_info = pd.read_excel('knotinfo_data_complete.xls',
skiprows = [1],
header=0,
index_col=False,
keep_default_na=True
)

In [9]:
X = knot_info[['determinant','signature','unknotting_number']]

In [10]:
def convert_list_to_int(unknotting):
    try:
        return int(unknotting)
    except:
        return -1
    
convert_list_to_int_vect = np.vectorize(convert_list_to_int)
        

In [11]:
knot_info['unknotting_number'] = convert_list_to_int_vect(knot_info['unknotting_number'])

In [13]:
mask = knot_info['unknotting_number'] >= 0

In [14]:
X = X[mask]

In [15]:
y = knot_info['alternating']

In [16]:
def YN_to_int(let):
    if let == 'Y':
        return 1
    elif let == 'N':
        return 0
    else:
        print(let)
        
YN_to_int_vect = np.vectorize(YN_to_int)

In [17]:
yp = YN_to_int_vect(y)

In [18]:
yp = yp[mask]

In [24]:
len(X)

2316

In [26]:
len(yp)

2316

In [22]:
from sklearn.linear_model import LogisticRegression

In [27]:
clf = LogisticRegression(random_state=3).fit(X, yp)

In [28]:
clf.score(X,yp)

0.8026770293609672

In [29]:
sum(yp)/len(yp)

0.6701208981001727

In [30]:
clf.coef_

array([[0.03063451, 0.02157264, 0.24040202]])

In [34]:
from sklearn.model_selection import cross_val_score
print(cross_val_score(clf, X, yp, cv=3))
cross_val_score(clf, X, yp, cv=3).mean()


[0.57772021 0.8626943  0.78108808]


0.7405008635578584