In [2]:
import numpy as np
from scipy.special import softmax

In [4]:
def generate_data(data):
    X = data[:, :-1]
    one = np.ones((X.shape[0], 1))
    X= np.hstack((one,X))
    X= X.astype(np.float64)
    Y = data[:, -1]
    Y = Y.astype(int)-1
    unique, counts = np.unique(Y, return_counts=True)
    W =np.zeros((X.shape[1], len(unique)), dtype=np.float64)
    return X,Y,W,counts

def g(W,x,j):
    z = np.dot(x,W)
    sm= softmax(z)
    return sm[j]

def loss(X,Y,W,counts):
    n = X.shape[0]
    Z = X @ W
    softmax_probs = softmax(Z, axis=1)
    indices = (np.arange(n), Y)
    correct_class_probs = softmax_probs[indices]
    scaled_probs = np.log(correct_class_probs) / counts[Y]
    loss_value = -np.mean(scaled_probs) / 2
    return loss_value

def compute_gradient(X, Y, W, counts):
    n, m = X.shape
    k = W.shape[1]
    z = X @ W  
    softmax_probs = softmax(z, axis=1) 
    indices = (np.arange(n), Y)
    Y_one_hot = np.zeros((n, k))
    Y_one_hot[indices] = 1
    grad_W = X.T @ ((softmax_probs - Y_one_hot) / counts[Y][:, np.newaxis]) / (2 * n) 
    return grad_W

In [14]:
def compute_n_partb(X, Y, W, gradient, n0, counts):
    nl = 0.0
    nh = n0
    # print(nh)
    while loss(X, Y, W, counts) > loss(X, Y, W - nh*gradient, counts):
        nh *= 2
        # print(nh)
    while loss(X, Y, W, counts) < loss(X, Y, W - nh*gradient, counts):
        nh /= 2
        # print(nh)
    nh *= 2
    # print("finally ", nh)
    for _ in range(20):
        n1 = (2*nl + nh)/3
        n2 = (nl + 2*nh)/3
        if loss(X, Y, W - n1*gradient, counts) > loss(X, Y, W - n2*gradient, counts):
            nl = n1
        else:
            nh = n2
    return nh-nl,(nl+nh)/2

def gradient_descent3_partb(X, Y, W, counts, n0, epochs, batch_size):
    n = X.shape[0]
    print(np.ceil(n/batch_size))
    # create a of size ceil(n/batch_size)
    n_with_epochs = [n0]*int(np.ceil(n/batch_size))
    # n_with_epochs = [n0*np.ceil(n/batch_size)]
    # print(n_with_epochs)
    # print(n_with_epochs)
    # print(n_with_epochs)
    for _ in range(epochs):
        print(n_with_epochs)
        batch_num = 0
        for start in range(0, n, batch_size):
            end = min(start + batch_size, n)
            X_batch = X[start:end]
            Y_batch = Y[start:end]
            batch_loss = loss(X_batch, Y_batch, W, counts)
            # print(f"Epoch{i+1}, Batch{1+int(start/batch_size)}, Loss{batch_loss}")

            gradient = compute_gradient(X_batch, Y_batch, W, counts)
            next_n0,learning_rate= compute_n_partb(X_batch, Y_batch, W, gradient, n_with_epochs[batch_num], counts)
            n_with_epochs[batch_num] = next_n0
            W -= learning_rate * gradient
            batch_num += 1

3.0
[1e-09, 1e-09, 1e-09]
[4.811658557148091e-12, 4.811658557147264e-12, 4.811658557148091e-12]
[2.9634634330326673e-12, 2.9634634330326673e-12, 5.926926866066989e-12]
[3.650348591704111e-12, 3.650348591704111e-12, 3.650348591704938e-12]
[4.49644314568763e-12, 4.4964431456884575e-12, 4.4964431456884575e-12]


In [6]:
train = np.loadtxt("Assignment1.2/train1.csv", delimiter=",", skiprows=1)
test = np.loadtxt("Assignment1.2/test1.csv", delimiter=",", skiprows=1)
actual_pred = np.loadtxt("Assignment1.2/test_pred1.csv", delimiter=",", skiprows=1)
X,Y,W,counts = generate_data(train)
X_test = test
one = np.ones((X_test.shape[0], 1))
X_test= np.hstack((one,X_test))
X_test= X_test.astype(np.float64)
print(X.shape)

(87595, 1184)


In [10]:
X,Y,W,counts = generate_data(train)
gradient_descent3_partb(X,Y,W,counts,1e-9,5,100)

3.0
[3.0000000000000004e-09]


IndexError: list index out of range

In [15]:
print(W.shape)
print(W[:5])

(1184, 4)
[[-2.26730609e-14 -1.46447428e-14 -2.89498998e-14  6.62677034e-14]
 [-1.04586825e-14 -2.71537298e-15 -1.06120846e-14  2.37861401e-14]
 [-9.54991900e-15  1.99264410e-14 -1.94893588e-14  9.11283682e-15]
 [-1.51548296e-14  1.80077189e-14 -1.33333440e-14  1.04804547e-14]
 [-4.46943354e-15 -3.29986926e-14 -3.12370314e-15  4.05918292e-14]]


In [12]:
Z = X_test @ W
softmax_probs = softmax(Z, axis=1)
output_model_pred = np.argmax(softmax_probs, axis=1) + 1

(58397, 4)
[[0.18446566 0.3616535  0.27125652 0.18262433]
 [0.16118022 0.40561638 0.27451333 0.15869007]
 [0.16176885 0.40483075 0.27392487 0.15947553]
 [0.17694939 0.37690955 0.27044707 0.17569399]
 [0.16216458 0.40432129 0.27349919 0.16001495]]
(58397,)
[2 2 2 2 2]


In [13]:
# write the softmax_probs in a csv file, with each row as comma separated value as probability of each class
np.savetxt("modelpredictionsb.csv", softmax_probs, delimiter=",")

In [16]:
def loss_test_set(test, actual_pred, W):
    X_test = test
    one = np.ones((X_test.shape[0], 1))
    X_test= np.hstack((one,X_test))
    X_test= X_test.astype(np.float64)
    unique, counts = np.unique(actual_pred, return_counts=True)
    print(counts)
    Z = X_test @ W
    softmax_probs = softmax(Z, axis=1)
    indices = np.argmax(softmax_probs, axis=1) +  1
    print(indices[:10])
    loss = 0
    for i in range(len(indices)):
        j = int(actual_pred[i] - 1)
        loss += np.log(softmax_probs[i][j]) / counts[j]
    return loss

print(loss_test_set(test, actual_pred, W))
    

[11018   677 16685 30017]
[3 3 3 3 3 3 3 3 3 3]
-5.527642266228281


: 