In [1]:
import numpy as np
import time
import sys
from scipy import optimize
sys.path.insert(1,"../ai_implementations")
import custom_ai_utils

In [2]:
# Load in the testing data
data_path = "data/intro.csv"
data = np.loadtxt(data_path, delimiter = ",", usecols = (1,2,3,4,5,6))

# Partition training data and result data 
X = data[:,:5]
y = data[:,5]
m = y.size

# Add the intercept term
X = np.concatenate([np.ones((m, 1)), X], axis=1)

In [9]:
def costGradRegFunction(theta, X, y, lambda_, powers):
    """
        Perform one step of regularized gradient descent
        
        Raise X to powers and apply theta, then regularize with lambda_ and compare to y
        
        Parameters
        ----------
        theta : numpy array (6,)
            The weights to be applied to X
        X : numpy array (n,6)
            The feature vector
        y : numpy array (n,)
            The result vector, containing actual voted outcomes for each feature vector
        lambda_ : float
            The regularization parameter
        powers : array (6,)
            The exponents to raise each feature to in X, as part of the hypothesis function
            
        Returns
        -------
        J : float
            The cost of this given prediction
        grad : float
            The regularized gradient for this step of gradient descent
    """
    # Number of training examples
    m = y.size
    # Perform sigmoid on hypothesis function
    h = custom_ai_utils.sigmoid((np.power(X,powers).dot(theta.T)))    
    # Just the regularized cost function for logistic regression, nothing to see here
    J = (1/m)*np.sum((-y*np.log(h)) - (1-y)*(np.log(1-h))) + (lambda_/(2*m))*np.sum(np.square(theta))
    # Set bias weight of theta to 0 because we don't want it to be regularized 
    temp = theta
    temp[0] = 0
    # Compute regularized gradient update 
    grad = (1/m)*((h-y).dot(X)) + (lambda_/m)*temp

    return J, grad

def costGradFunction(theta, X, y, powers):
    """
        Same as above, but does not regularize
    """
   
    m = y.size
   
    h = custom_ai_utils.sigmoid(X.dot(theta.T))
    
    J = (1/m)*np.sum((-y*np.log(h)) - (1-y)*(np.log(1-h)))    
    
    grad = (1/m)*((h-y).dot(X)) 
    
    return J, grad

In [16]:
def oneVsAllReg(X, y, num_classes, lambda_, powers):
    """
        Perform regularized one vs. all classification 
        
        Determine the optimal weights for each class with the given data
        
        Parameters
        ----------
        X : numpy array (n,6)
            The feature vector, containing n examples and 6 features including the intercept term
        y : numpy array (n,)
            The output vector, containing actual votes for each of the examples
        num_classes : int
            The number of possible prediction classes
        lambda_ : float
            The regularization parameter
        powers : array (6,)
            The exponents to raise each feature to in X, as part of the hypothesis function
            
        Returns
        -------
        all_theta : numpy array (num_classes,n)
            The weight matrix, containing optimized weights for each class
    """
    # Number of training examples, number of features (including intercept term)
    m, n = X.shape
    # Initialize weight matrix 
    all_theta = np.zeros((num_classes, n))
    # Assign optimal theta for each class 
    for c in range(num_classes):
        initial_theta = np.zeros(n)
        options = {'maxiter' : 50} # Feel free to play around with this 
        res = optimize.minimize(costGradRegFunction, initial_theta, (X, (y==c)*1, lambda_, powers),
                               jac=True, method='TNC', options=options) #Change extra params mayhaps
        all_theta[c] = res.x
    
    return all_theta

def oneVsAll(X, y, num_classes, powers):
    """
        Same as above, but unregularized
    """
    m, n = X.shape
    
    all_theta = np.zeros((num_classes, n))
    for c in range(num_classes):
        initial_theta = np.zeros(n)
        options = {'maxiter' : 50}
        res = optimize.minimize(costGradFunction, initial_theta, (X, (y==c)*1, powers),
                               jac=True, method='TNC', options=options)
        all_theta[c] = res.x
    return all_theta

In [14]:
"""
    Given optimized theta for each class (all_theta) and training data (X), return all 
    classifications for each element in X. 
"""
def classifyOneVsAll(all_theta, X, powers):
    # Will always be 3 in this case but whatever. Nice to seem generalized. 
    num_labels = np.array(all_theta.shape[0])
    
    # Array of predictions to return
    p = np.zeros(X.shape[0])
    
    # Perform predictions with theta and X for each possible class 
    predictions = custom_ai_utils.sigmoid(np.power(X, powers).dot(all_theta.T))
    p = np.argmax(predictions, axis=1)
    
    return p

In [17]:
# Some random testing things
lambda_ = .1
num_classes = 3
theta = np.ones(6)
h = custom_ai_utils.sigmoid(X.dot(theta.T))
powers = np.ones(6)

#all_theta = oneVsAllReg(X, y, num_classes, lambda_, powers)
all_theta = oneVsAll(X, y, num_classes, powers)
predictions = classifyOneVsAll(all_theta, X, powers)

print('Training Set Accuracy: {:.2f}%'.format(np.mean(predictions == y) * 100))

Training Set Accuracy: 79.71%


In [21]:
# Trying to identify best hypothesis by evaluating every single possible combination
best_powers = ''
best_performance = 0
best_theta = ''
lambda_ = .1
num_classes = 3
now = time.time()
theta = np.ones(6)
for i in range(6):
    print("Main: " + str(i))
    for j in range(6):
        print("Sub: " + str(j))
        print("Current best_theta: " + str(best_theta))
        for k in range(6):
            for l in range(6):
                for m in range(6):
                    for n in range(6):
                        powers = np.array([i,j,k,l,m,n])
                        #all_theta = oneVsAllReg(X, y, num_classes, lambda_, powers)
                        all_theta = oneVsAll(X, y, num_classes, powers)
                        preds = classifyOneVsAll(all_theta, X, powers)
                        performance = np.mean(preds == y) * 100
                        if(performance > best_performance):
                            best_performance = performance
                            best_powers = powers
                            best_theta = all_theta
then = time.time()
print("Best hypothesis function has powers: " + str(best_powers))
print("It had the following performance : " + str(best_performance))
print("It had the following theta: " + str(best_theta))
print("Time took: " + str(then - now))

Main: 0
Sub: 0
Current best_theta: 
Sub: 1
Current best_theta: [[-0.94707612  0.0935797   0.21562864 -0.06362213 -0.0705111  -1.06748832]
 [ 0.37619398 -0.10531447 -0.06600369  0.07863234  0.0143614   1.69845091]
 [-2.51165427  0.11899932 -1.59840977 -0.13111087  0.21506235 -2.90024353]]
Sub: 2
Current best_theta: [[-0.94707612  0.0935797   0.21562864 -0.06362213 -0.0705111  -1.06748832]
 [ 0.37619398 -0.10531447 -0.06600369  0.07863234  0.0143614   1.69845091]
 [-2.51165427  0.11899932 -1.59840977 -0.13111087  0.21506235 -2.90024353]]
Sub: 3
Current best_theta: [[-0.94707612  0.0935797   0.21562864 -0.06362213 -0.0705111  -1.06748832]
 [ 0.37619398 -0.10531447 -0.06600369  0.07863234  0.0143614   1.69845091]
 [-2.51165427  0.11899932 -1.59840977 -0.13111087  0.21506235 -2.90024353]]
Sub: 4
Current best_theta: [[-0.94707612  0.0935797   0.21562864 -0.06362213 -0.0705111  -1.06748832]
 [ 0.37619398 -0.10531447 -0.06600369  0.07863234  0.0143614   1.69845091]
 [-2.51165427  0.11899932 -1

Sub: 4
Current best_theta: [[-0.94707612  0.0935797   0.21562864 -0.06362213 -0.0705111  -1.06748832]
 [ 0.37619398 -0.10531447 -0.06600369  0.07863234  0.0143614   1.69845091]
 [-2.51165427  0.11899932 -1.59840977 -0.13111087  0.21506235 -2.90024353]]
Sub: 5
Current best_theta: [[-0.94707612  0.0935797   0.21562864 -0.06362213 -0.0705111  -1.06748832]
 [ 0.37619398 -0.10531447 -0.06600369  0.07863234  0.0143614   1.69845091]
 [-2.51165427  0.11899932 -1.59840977 -0.13111087  0.21506235 -2.90024353]]
Best hypothesis function has powers: [0 2 4 4 0 0]
It had the following performance : 82.6086956522
It had the following theta: [[-0.94707612  0.0935797   0.21562864 -0.06362213 -0.0705111  -1.06748832]
 [ 0.37619398 -0.10531447 -0.06600369  0.07863234  0.0143614   1.69845091]
 [-2.51165427  0.11899932 -1.59840977 -0.13111087  0.21506235 -2.90024353]]
Time took: 300.63893485069275


check out [my website](https://dylanpjackson.com) please <3 and maybe hire me if you want