# Predict an applicant's outcome when they apply to CSH using past data. Employs logistic regression to identify potential outcome classes. Check out the README for a better explanation.  
        
# By : Dylan P. Jackson
check out [my website](https://dylanpjackson.com) please <3 and maybe hire me if you want

In [1]:
# Them imports yo (Will probably need more)
import numpy as np
import sys
from scipy import optimize
sys.path.insert(1,"../ai_implementations")
import custom_ai_utils

In [2]:
# Load in some data
data_path = "data/intro.csv"
data = np.loadtxt(data_path, delimiter = ",", usecols = (1,2,3,4,5,6))

# Partition training data and result data 
X = data[:,:5]
y = data[:,5]
m = y.size

# Add the intercept term
X = np.concatenate([np.ones((m, 1)), X], axis=1)


In [3]:
# The gradient descent for this boi will be done using scipy.optimize
# To that end, we establish a cost function which not only returns the cost
# but the gradient at each step as well 

"""
    Given training data (X), result data (y), theta (theta), regularization parameter (lambda_)
    and hypothesis function which was computed with given X and theta (h), return the cost and
    gradient of hypothesis function with the given theta 
"""
def costGradFunction(theta, X, y, lambda_):
    # Number of training examples
    m = y.size
    
    h = custom_ai_utils.sigmoid(X.dot(theta.T))
    
    # Yikes. Really, it's just the regularized cost function for logistic regression. 
    J = (1/m)*np.sum((-y*np.log(h)) - (1-y)*(np.log(1-h))) + (lambda_/(2*m))*np.sum(np.square(theta))
    
    # Set bias weight of theta to 0 because we don't want it to be regularized 
    temp = theta
    temp[0] = 0
    
    # Compute regularized gradient update 
    grad = (1/m)*((h-y).dot(X)) + (lambda_/m)*temp

    return J, grad

In [23]:
"""
    Given training data (X), result data (y), the number of classes (num_classes) and 
    regularization parameter (lambda_), return optimized theta for each class
"""
def oneVsAll(X, y, num_classes, lambda_):
    # Number of training examples, number of features (including intercept term)
    m, n = X.shape
    
    # Initialize theta matrix 
    all_theta = np.zeros((num_classes, n))
    # Assign optimal theta for each class 
    for c in range(num_classes):
        initial_theta = np.zeros(n)
        options = {'maxiter' : 50} # Feel free to play around with this 
        res = optimize.minimize(costGradFunction, initial_theta, (X, (y==c)*1, lambda_),
                               jac=True, method='TNC', options=options) #Change extra params mayhaps
        all_theta[c] = res.x
    
    return all_theta

In [5]:
"""
    Given optimized theta for each class (all_theta) and training data (X), return all 
    classifications for each element in X. 
"""
def classifyOneVsAll(all_theta, X):
    # Will always be 3 in this case but whatever. Nice to seem generalized. 
    num_labels = np.array(all_theta.shape[0])
    
    # Array of predictions to return
    p = np.zeros(X.shape[0])
    
    # Perform predictions with theta and X for each possible class 
    predictions = custom_ai_utils.sigmoid(X.dot(all_theta.T))
    p = np.argmax(predictions, axis=1)
    
    return p

In [34]:
lambda_ = .1
num_classes = 3
theta = np.ones(6)
h = custom_ai_utils.sigmoid(X.dot(theta.T))

all_theta = oneVsAll(X, y, num_classes, lambda_)
predictions = classifyOneVsAll(all_theta, X)

print('Training Set Accuracy: {:.2f}%'.format(np.mean(predictions == y) * 100))

Training Set Accuracy: 80.19%
