In [1]:
import numpy as np
import scipy.io as io
import pandas as pd
import time

In [2]:
mat = io.loadmat('data_doc_classif')

x_training = mat['Xts'].T.toarray()
y_training = mat['yts']
x_test = mat['Xvr'].T.toarray()

df_x_train = pd.DataFrame(x_training)
df_y_train = pd.DataFrame(y_training)
df_y_dummy = pd.get_dummies(df_y_train[0]).to_numpy()
df_x_test = pd.DataFrame(x_test)

## Utility Functions

In [3]:
# objective 1: build classifier of document using one layer NN, based on non linear activation function
def loss_function(prediction, truth):
#     loss = 0.5* (np.sum((prediction - truth)**2))
    loss = 0.5 * ((prediction - truth)**2)
    return loss

def activation_function(x):
    return 1 / (1+ np.exp(-x))

def softmax(x):
    e = np.exp(x)
    return e / e.sum(axis=1, keepdims=True)

def prediction(s, w, b):
    pro = np.dot(s, w) + b
    predict = np.argmax(softmax(pro))
    return predict

def full_prediction(test_data, label, weight_file, bias_file):
    pred_weight = np.load(weight_file)
    pred_bias = np.load(bias_file)
    couunt = 0
    for i in range(test_data.shape[0]):
        pred = prediction(test_data[i].reshape(1, -1), pred_weight, pred_bias)
        true_value = np.argmax(label[i])
        if pred == true_value:
            couunt += 1
    return couunt

def save_prediction(s, w1, b1):
    all_prediction = []
    for i in range(s.shape[0]):
        pro = np.dot(s[i], w1) + b1
#         print(pro.shape)
        predict = np.argmax(softmax(pro))
        predict = (predict+1)*100 + 1
        all_prediction.append(predict)
    return all_prediction

## Three different gradient algorithms

In [4]:
def Gradient(input_data, output_data, epoch, typ):
    # samples = 13960, features = 43586
    n_samples, n_features = input_data.shape  
    _, num_classes = output_data.shape
    weights = np.random.randn(n_features, num_classes)
    bias = np.random.randn(1, num_classes)
    min_loss = np.inf
    lr = 1e-3
    for i in range(epoch):
        
        if typ == 'GD':
            trainX_data = input_data
            trainY_data = output_data
            
        if typ == 'SGD':
            # select one sample randomly
            s = np.random.randint(0, n_samples, 1)
            trainX_data = input_data[s[0]].reshape(1, -1)
            trainY_data = output_data[s[0]]
            
        if typ == 'mini':
            # select b samples randomly from the whole training sample
            b = 10
            s = np.random.randint(0, n_samples, b)
            trainX_data = []
            trainY_data = []
            for idx in s:
                trainX_data.append(input_data[idx])
                trainY_data.append(output_data[idx])
            trainX_data = np.array(trainX_data)
            trainY_data = np.array(trainY_data)
        
        z = np.dot(trainX_data, weights) + bias
        f = softmax(z)
        loss = loss_function(f, trainY_data)
        
        weights = weights - lr * np.dot(trainX_data.T, loss)
        
        if loss.shape != bias.shape:
            bias = bias - lr * np.sum(loss, axis=0) / loss.mean()
        else:
            bias = bias - lr * loss
        
        epoch_loss = np.sum(loss) / trainX_data.shape[0]
        if min_loss > epoch_loss:
            np.save(f'{typ}_weight.pkl', weights)
            np.save(f'{typ}_bias.pkl', bias)
            min_loss = epoch_loss
        if (i+1) % 10 == 0:
            print(f'Epoch {i+1} loss: ', epoch_loss) 
    return weights, bias

## Training

In [7]:
gr = ['GD', 'SGD', 'mini']
epoch = 100
now = gr[0]
st = time.time()
w, b = Gradient(x_training, df_y_dummy, epoch, now)
en = time.time()
print("Time elpased: ", (en-st)/60, ' minutes')

Epoch 10 loss:  4.063684531010625
Epoch 20 loss:  5.222874010069847
Epoch 30 loss:  5.827993892870155
Epoch 40 loss:  4.898879943860212
Epoch 50 loss:  4.879181376976336
Epoch 60 loss:  5.560849396209576
Epoch 70 loss:  3.5320499518702833
Epoch 80 loss:  4.694903604939075
Epoch 90 loss:  4.988730324502268
Epoch 100 loss:  5.188927975294052
Time elpased:  0.03088204065958659  minutes


## Load weight and bias, then make prediction and save

In [187]:
w1 = np.load(f'{now}_weight.pkl.npy')
b1 = np.load(f'{now}_bias.pkl.npy')
al = save_prediction(x_test, w1, b1)
df_prediction = pd.DataFrame(al)
df_prediction.to_csv(f'{now}.csv')

  e = np.exp(x)
  return e / e.sum(axis=1, keepdims=True)


In [121]:
grad = pd.read_csv("mini.csv")
# grad['Unnamed: 0'] = 'ID'
# grad = grad.rename(columns={'Unnamed: 0' : 'ID', '0':'Class'})
# grad['ID'] = grad['ID'] + 1
# grad.to_csv('mini.csv')
# grad = grad.drop(columns = ['Unnamed: 0'])
# grad['ID'] = grad['ID'] - 1
grad.to_csv('mini.csv', index=False)
grad

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,ID,Class
0,0,0,1,601
1,1,1,2,401
2,2,2,3,101
3,3,3,4,901
4,4,4,5,1101
...,...,...,...,...
5984,5984,5984,5985,1501
5985,5985,5985,5986,601
5986,5986,5986,5987,401
5987,5987,5987,5988,301


In [135]:
grad1 = pd.read_csv("mini_final.csv")
# grad1 = grad1.reset_index(drop=True)
# grad1 = grad1.drop(columns=['Unnamed: 0'])
grad1

Unnamed: 0.1,Unnamed: 0,ID,Class
0,0,1,1701
1,1,2,401
2,2,3,201
3,3,4,1701
4,4,5,1001
...,...,...,...
5984,5984,5985,201
5985,5985,5986,101
5986,5986,5987,101
5987,5987,5988,1601


In [136]:
data = {"ID" : grad1["ID"], "Class" : grad1['Class']}
df = pd.DataFrame(data)
df.to_csv('mini_final.csv', index=False)

In [137]:
p = pd.read_csv('mini_final.csv')
p

Unnamed: 0,ID,Class
0,1,1701
1,2,401
2,3,201
3,4,1701
4,5,1001
...,...,...
5984,5985,201
5985,5986,101
5986,5987,101
5987,5988,1601
