In [28]:
from matplotlib import pyplot as plt
import numpy as np 
import math
import matplotlib.pyplot as plt
%matplotlib inline

### Defined by me

In [278]:
def sigmoid(s):
    return 1 / (1 + np.exp(-s))

In [405]:
def logistic_regression(data, label, max_iter, learning_rate):
    '''
    The logistic regression classifier function.

    Args:
    data: train data with shape (1561, 3), which means 1561 samples and 
          each sample has 3 features.(1, symmetry, average internsity)
          
    label: train data's label with shape (1561,1). 
           1 for digit number 1 and -1 for digit number 5.
           
    max_iter: max iteration numbers
    
    learning_rate: learning rate for weight update

    Returns:
        w: the seperater with shape (3, 1). You must initilize it with w = np.zeros((d,1))
    '''
    N,m = data.shape
    w = np.zeros((m,1))
    # Compute the Gradient
    for t in range(max_iter):
        g = 0
        for n in range(N):
            g = (label[n]*data[n]*sigmoid(-label[0]*np.dot(data[0],w))) + g
        g = (-1/N)*g
        w = w.T + (learning_rate * -g)
        w = w.T
    print(w)    
    return w

In [412]:
def accuracy(x, y, w):
    '''
    This function is used to compute accuracy of a logsitic regression model.
    
    Args:
    x: input data with shape (n, d), where n represents total data samples and d represents
        total feature numbers of a certain data sample.
        
    y: corresponding label of x with shape(n, 1), where n represents total data samples.
    
    w: the seperator learnt from logistic regression function with shape (d, 1),
        where d represents total feature numbers of a certain data sample.

    Return 
        accuracy: total percents of correctly classified samples. Set the threshold as 0.5,
        which means, if the predicted probability > 0.5, classify as 1; Otherwise, classify as -1.
    '''

### Defined by Professor

In [406]:
#Use for testing the training and testing processes of a model
def train_test_a_model(modelname, train_data, train_label, test_data, test_label, max_iter, learning_rate):
    print(modelname+" testing...")
    
    # max iteration test cases 
    for i, m_iter in enumerate(max_iter):
        w = logistic_regression(train_data, train_label, m_iter, learning_rate[1])
        Ain, Aout = accuracy(train_data, train_label, w), accuracy(test_data, test_label, w)
        print("max iteration testcase%d: Train accuracy: %f, Test accuracy: %f"%(i, Ain, Aout))
    # learning rate test cases
    for i, l_rate in enumerate(learning_rate):
        w = logistic_regression(train_data, train_label, max_iter[3], l_rate)
        Ain, Aout = accuracy(train_data, train_label, w), accuracy(test_data, test_label, w)
        print("learning rate testcase%d: Train accuracy: %f, Test accuracy: %f"%(i, Ain, Aout))
    print(modelname+" test done.")

In [407]:
def extract_feature(image):
    image = np.reshape(image, (16, 16))
    flip_image = np.flip(image, 1)
    diff = abs(image-flip_image)
    sys  = -sum(sum(diff))/256
    intense = sum(sum(image))/256
    return sys, intense

In [408]:
def load_data(dataloc):
    data = np.loadtxt(dataloc, unpack='true')
    data = np.transpose(data, (1,0))
    return data

In [409]:
def load_features(dataloc):
    data = load_data(dataloc)
    n, _ = data.shape
    data_set = []
    for i in range(n):
        label = 1 if data[i, 0]==1 else -1
        image = data[i, 1:]
        sys, intense = extract_feature(image)
        data_set.append([label, 1, sys, intense])
    return np.array(data_set)[:,1:], np.array(data_set)[:,0]

In [410]:
def test_logistic_regression():
    max_iter = [100, 200, 500,1000]
    learning_rate = [0.1, 0.2, 0.5]
    traindataloc,testdataloc = "../data/train.txt", "../data/test.txt"
    train_data,train_label = load_features(traindataloc)
    test_data, test_label = load_features(testdataloc)
    try:
        train_test_a_model("logistic regression", train_data, train_label, test_data,test_label, max_iter, learning_rate)
    except:
        print("Please finish logistic_regression() and cross_entropy_error() functions \n\
        before you run the test_logistic_regression() function.\n")

In [411]:
test_logistic_regression()

logistic regression testing...
[[ 4.63164024]
 [ 1.94399222]
 [-4.86285452]]
Please finish logistic_regression() and cross_entropy_error() functions 
        before you run the test_logistic_regression() function.

