# Softmax Regression

### all necessary packages are listed below

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# Below is merely used for splitting data in cross validation part !!!
from sklearn.model_selection import KFold
kf = KFold(n_splits=5, shuffle=True, random_state=0)

### the functions defined below are the main coding parts of the Softmax Regression model

In [None]:
# use the softmax function to approximate probability
def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)    # deal with out-of-range problems
    sum_exp_a = np.sum(exp_a)
    h = exp_a / sum_exp_a
    
    return h

In [None]:
# use gradient descend to find the optimal w in softmax regression
def grad_desc(X, y, lambd, quiet=True):
    n = np.shape(X)[1]
    s = np.shape(y)[0]
    weights = np.ones((n,s))
    alpha = 0.01
    accuracy = 0.000001
    maxIter = 1000000

    for i in range(maxIter):
        if i%10000 == 0 and not quiet:
            print("Gradient Descend Round", i)
            alpha -= 0.0001
        h = softmax(np.dot(X,weights)).transpose()
        delta = np.dot( X.transpose() , (h-y).transpose() ) / X.shape[0] +lambd*weights
        weights = weights - alpha * delta
        if (np.dot(alpha*delta[:,0].transpose(), alpha*delta[:,0])) < accuracy:
            if (np.dot(alpha*delta[:,1].transpose(), alpha*delta[:,1])) < accuracy:
                break

    return weights

In [None]:
# use the derived w to predict the labels
def predict(X, w):
    R = X.dot( w )
    r = np.zeros(R.shape[0])
    for i in range(len(R)):
        r_pred = softmax(R[i])
        if r_pred[0] >= r_pred[1]:
            r[i] = 1
        else:
            r[i] = 0

    return r

### the main training and testing procedures are as follows

In [None]:
# The "evaluate" function is defined in the metrics part of the project
l_list = [0, 0.001, 0.01, 0.1, 1, 10]
for i in range(len(l_list)):
    lambd = l_list[i]
    print("++++++++++++++++++++++++++ At lambda =",lambd, "+++++++++++++++++++++++++++++++")
    w = grad_desc(X_train, y_train, lambd, quiet=True)
    print("==========Training Result==========")
    y_pred = predict(X_train, w)
    evaluate(y_pred, y_train)
    print("==========Testing Result==========")
    y_pred = predict(X_test, w)
    evaluate(y_pred, y_test)