In [217]:
import numpy as np
from scipy.special import softmax

In [218]:
def generate_data(data):
    X = data[:, :-1]
    # one = np.ones((X.shape[0], 1))
    # X= np.hstack((one,X))
    X= X.astype(np.float64)
    Y = data[:, -1]
    Y = Y.astype(int)-1
    unique, counts = np.unique(Y, return_counts=True)
    W =np.zeros((X.shape[1]+1, len(unique)), dtype=np.float64)
    return X,Y,W,counts

def g(W,x,j):
    z = np.dot(x,W)
    sm= softmax(z)
    return sm[j]

def loss(X,Y,W,counts):
    n = X.shape[0]
    Z = X @ W
    softmax_probs = softmax(Z, axis=1)
    indices = (np.arange(n), Y)
    correct_class_probs = softmax_probs[indices]
    scaled_probs = np.log(correct_class_probs) / counts[Y]
    loss_value = -np.mean(scaled_probs) / 2
    return loss_value

def compute_gradient(X, Y, W, counts):
    n, m = X.shape
    k = W.shape[1]
    z = X @ W  
    softmax_probs = softmax(z, axis=1) 
    indices = (np.arange(n), Y)
    Y_one_hot = np.zeros((n, k))
    Y_one_hot[indices] = 1
    grad_W = X.T @ ((softmax_probs - Y_one_hot) / counts[Y][:, np.newaxis]) / (2 * n) 
    return grad_W

In [219]:
# def compute_n_partb(X, Y, W, gradient, n0, counts):
#     nl = 0.0
#     nh = n0
#     while loss(X, Y, W, counts) > loss(X, Y, W - nh*gradient, counts):
#         nh *= 2
#     while loss(X, Y, W, counts) < loss(X, Y, W - nh*gradient, counts):
#         nh /= 2
#     nh *= 2
#     for _ in range(20):
#         n1 = (2*nl + nh)/3
#         n2 = (nl + 2*nh)/3
#         if loss(X, Y, W - n1*gradient, counts) > loss(X, Y, W - n2*gradient, counts):
#             nl = n1
#         else:
#             nh = n2
#     return nh-nl,(nl+nh)/2

# import matplotlib.pyplot as plt
# import numpy as np

# def compute_n_partb(X, Y, W, gradient, n0, counts):
#     nl = 0.0
#     nh = n0

#     # Arrays to store n values and corresponding loss values
#     n_values = []
#     loss_values = []

#     # Store the initial loss for W
#     initial_loss = loss(X, Y, W, counts)

#     # Find the upper bound for n (nh)
#     while loss(X, Y, W, counts) > loss(X, Y, W - nh * gradient, counts):
#         nh *= 2
#         n_values.append(nh)
#         loss_values.append(loss(X, Y, W - nh * gradient, counts))

#     # Find the correct nh value
#     while loss(X, Y, W, counts) < loss(X, Y, W - nh * gradient, counts):
#         nh /= 2
#         n_values.append(nh)
#         loss_values.append(loss(X, Y, W - nh * gradient, counts))

#     nh *= 2
#     n_values.append(nh)
#     loss_values.append(loss(X, Y, W - nh * gradient, counts))

#     # Use ternary search to refine nl and nh
#     for _ in range(5):
#         n1 = (2 * nl + nh) / 3
#         n2 = (nl + 2 * nh) / 3

#         loss_n1 = loss(X, Y, W - n1 * gradient, counts)
#         loss_n2 = loss(X, Y, W - n2 * gradient, counts)

#         if loss_n1 > loss_n2:
#             nl = n1
#         else:
#             nh = n2

#         n_values.extend([n1, n2])
#         loss_values.extend([loss_n1, loss_n2])

#     # Finally, append the midpoint loss
#     midpoint = (nl + nh) / 2
#     final_loss = loss(X, Y, W - midpoint * gradient, counts)
#     n_values.append(midpoint)
#     loss_values.append(final_loss)

#     # Plotting n vs Loss with color variation
#     plot_n_vs_loss(n_values, loss_values)

#     return midpoint

# def plot_n_vs_loss(n_values, loss_values):
#     # Normalize the color range based on the index of n_values
#     colors = np.linspace(0, 1, len(n_values))

#     # Create a scatter plot with color variation
#     scatter = plt.scatter(n_values, loss_values, c=colors, cmap='viridis', edgecolor='k')

#     # Plot lines connecting the points
#     plt.plot(n_values, loss_values, color='gray', linestyle='--')

#     # Add color bar to show the mapping of iteration index to color
#     cbar = plt.colorbar(scatter)
#     cbar.set_label('Iteration Index')

#     plt.xlabel("n values")
#     plt.ylabel("Loss values")
#     plt.title("n vs Loss with Color Varying by Iteration Index")
#     plt.show()

# # Usage example: You will need to pass your own X, Y, W, gradient, n0, and counts values
# # compute_n_partb(X, Y, W, gradient, n0, counts)



In [220]:
def compute_n_partb(X, Y, W, gradient, n0, counts):
    nl = 0.0
    nh = n0
    prev_loss= loss(X, Y, W, counts)
    while prev_loss > loss(X, Y, W - nh*gradient, counts):
        nh *= 2
        prev_loss= loss(X, Y, W - nh*gradient, counts)
    if nh>n0:
        nl= nh/2
    else:
        while loss(X, Y, W, counts) < loss(X, Y, W - nh*gradient, counts):
            nh /= 2
        nh *= 2
    for _ in range(5):
        n1 = (2*nl + nh)/3
        n2 = (nl + 2*nh)/3
        if loss(X, Y, W - n1*gradient, counts) > loss(X, Y, W - n2*gradient, counts):
            nl = n1
        else:
            nh = n2
    return (nl+nh)/2

In [221]:


def gradient_descent3_partb(X, Y, W, counts, n0, epochs, batch_size):
    n = X.shape[0]
    for _ in range(epochs):
        batch_num = 0
        for start in range(0, n, batch_size):
            end = min(start + batch_size, n)
            X_batch = X[start:end]
            Y_batch = Y[start:end]
            batch_loss = loss(X_batch, Y_batch, W, counts)
            print(f"Epoch{_+1}, Batch{1+int(start/batch_size)}, Loss{batch_loss}")

            gradient = compute_gradient(X_batch, Y_batch, W, counts)
            learning_rate= compute_n_partb(X_batch, Y_batch, W, gradient, n0, counts)
            W -= learning_rate * gradient
            batch_num += 1
    return W

In [222]:
from sklearn.preprocessing import StandardScaler
train = np.loadtxt("Assignment1.2/train1.csv", delimiter=",", skiprows=1)
test = np.loadtxt("Assignment1.2/test1.csv", delimiter=",", skiprows=1)
actual_pred = np.loadtxt("Assignment1.2/test_pred1.csv", delimiter=",", skiprows=1)
X,Y,W,counts = generate_data(train)
X_test = test
one = np.ones((X_test.shape[0], 1))
X_test= X_test.astype(np.float64)
print(X.shape)
scaler = StandardScaler().fit(X)
X = scaler.transform(X)
X_test = scaler.transform(X_test)
X_test= np.hstack((one,X_test))
one = np.ones((X.shape[0],1))
X= np.hstack((one,X))

(87595, 1183)


In [223]:
W= gradient_descent3_partb(X,Y,W,counts,1e4,25,87595)

Epoch1, Batch1, Loss3.165236283166598e-05
Epoch2, Batch1, Loss2.67407516218577e-05
Epoch3, Batch1, Loss2.5051909632517856e-05
Epoch4, Batch1, Loss2.4098271710054052e-05
Epoch5, Batch1, Loss2.3469161983370397e-05
Epoch6, Batch1, Loss2.301881617296162e-05
Epoch7, Batch1, Loss2.267904937818342e-05
Epoch8, Batch1, Loss2.241282676487056e-05
Epoch9, Batch1, Loss2.219809538875155e-05
Epoch10, Batch1, Loss2.202088318279678e-05
Epoch11, Batch1, Loss2.1871902881557348e-05
Epoch12, Batch1, Loss2.1744728723831587e-05
Epoch13, Batch1, Loss2.1634764328217606e-05
Epoch14, Batch1, Loss2.1538630385672535e-05
Epoch15, Batch1, Loss2.1453784715634292e-05
Epoch16, Batch1, Loss2.137827713004682e-05
Epoch17, Batch1, Loss2.1310586045850514e-05
Epoch18, Batch1, Loss2.124950655771541e-05
Epoch19, Batch1, Loss2.1194071932197612e-05
Epoch20, Batch1, Loss2.114349739485046e-05
Epoch21, Batch1, Loss2.109713913810034e-05
Epoch22, Batch1, Loss2.105446393836321e-05
Epoch23, Batch1, Loss2.1015026306116578e-05
Epoch24, B

In [224]:
print(W.shape)
print(W[:5])

(1184, 4)
[[ 0.01737195 -0.25185358  0.03091129  0.20357035]
 [-0.03893799  0.00627864 -0.01302418  0.04568354]
 [-0.02027115  0.04835526 -0.05143434  0.02335024]
 [-0.00451498  0.0011484  -0.00345959  0.00682616]
 [ 0.0128235  -0.05190228  0.01503519  0.0240436 ]]


In [225]:
Z = X_test @ W
softmax_probs = softmax(Z, axis=1)
print(Z[:5])
output_model_pred = np.argmax(Z, axis=1) + 1

[[-0.06555898  0.03300401 -0.64807061  0.68062558]
 [-0.4652627   0.64420059  1.39725368 -1.57619158]
 [ 0.54387605 -0.66451501 -0.88963929  1.01027825]
 [ 0.80719547 -0.76054999 -0.19910945  0.15246397]
 [-0.7802772  -0.33592207  1.79829465 -0.68209538]]


In [226]:
# write the softmax_probs in a csv file, with each row as comma separated value as probability of each class
np.savetxt("modelpredictionsb.csv", softmax_probs, delimiter=",")
np.savetxt("temp.csv", output_model_pred, delimiter=",")

In [227]:
actual_pred= actual_pred.astype(int)-1
test_unique, test_counts= np.unique(actual_pred, return_counts=True)
print(loss(X_test, actual_pred, W, test_counts))
    

3.465886286518208e-05


In [228]:
print(counts)

[16131  1032 25406 45026]
