In [None]:
import numpy as np
from matplotlib import pyplot as plt
import pandas

In [None]:
def generate_one_hot(value):
    one_hot = [0.0]*26
    one_hot[value] = 1.0
    return one_hot

In [None]:
def prepare_dataset(filename):
    '''
    ###### Objective
    A function used to extract data from the csv file and preprocess it, such that it can be
    used by our machine learning algorithm
    
    ### Input
    filename
    
    #### Output
    Input and output of machine learning model (in this case, a SOFTMAX classification model)
    
    '''
    alphabet = {"A":0,"B":1,"C":2,"D":3,"E":4,"F":5,"G":6,"H":7,"I":8,"J":9,
               "K":10,"L":11,"M":12,"N":13,"O":14,"P":15,"Q":16,"R":17,"S":18,"T":19,
               "U":20,"V":21,"W":22,"X":23,"Y":24,"Z":25}
    
    dataframe = pandas.read_csv(filename)
    
    X = []
    Y = []
    Y_hot = []
    
    for item,row in dataframe.iterrows():
        X1.append([1,row['INPUT1'],row['INPUT2'],row['INPUT3'],row['INPUT4'],row['INPUT5'],row['INPUT6'],row['INPUT7'],row['INPUT8'],row['INPUT9'],row['INPUT10'],row['INPUT11'],row['INPUT12'],row['INPUT13'],row['INPUT14'],row['INPUT15'],row['INPUT16']])
        X2.append([row['LETTER']])
    X = np.array(X)
    Y = np.array(Y)
        
    for i in Y:
        Y_hot.append(generate_one_hot(alphabet[i[0]]))
    Y_hot = np.array(Y_hot)
    
    return X,Y_hot

X,Y = prepare_dataset('letter-recognition.csv')


In [None]:
def softmax(X):
    k = np.sum(np.exp(X), axis = 1)
    t = 0
    X_out = []
    
    for i in X:
        X_out.append(list(np.exp(i)/k[t]))
        t += 1
    return np.array(X_out)

In [None]:
def cce_loss(y_pred, y_true):
    loss = y_true*np.log(y_pred)
    return -np.sum(loss)/len(y_pred)

In [None]:
np.random.seed(100)

X = X[0:10000]
Y = Y[0:10000]


EPOCHS = 100
LR = 1e-2

BATCH_SIZE = X.shape[0]
OUTPUT_SIZE = 26
PRINT_FREQUENCY = 50

theta = np.random.randn(X.shape[1],OUTPUT_SIZE)

loss = []
best_theta = theta
best_loss = np.inf

for i in range(EPOCHS):
    epoch_loss = 0.0
    for b in range(0,len(X), BATCH_SIZE):
        model_output = softmax(X[b:b+BATCH_SIZE]@theta)
        
        d_theta = (X[b:b+BATCH_SIZE].T@(((model_output - Y[b:b+BATCH_SIZE]))))
        theta -= LR*(d_theta)################## gradient descent step
        
        epoch_loss += cce_loss(model_output, Y[b:b+BATCH_SIZE])
    if(epoch_loss<best_loss):
        best_loss = epoch_loss
        best_theta = theta
    if i%PRINT_FREQUENCY == 0:
        print('Epoch number = '+str(i)+'----> Loss = '+str(epoch_loss))
    
    loss.append(epoch_loss)
    
print('The loss at the end of training is ==', loss[-1])
print('The best loss is ==', best_loss)

plt.plot(range(1,EPOCHS+1),loss)
plt.ylabel('CCE')
plt.xlabel('Number of epochs')
plt.show()
print(theta)
