In [1]:
import numpy as np
import pandas as pd

num_classes = 5
num_epochs = 100
num_gram = 2
lrs = [1e3, 1e2, 1e1, 1, 1e-1, 1e-2, 1e-3]
batch_size = 32

In [2]:
df = pd.read_csv('train.tsv', sep='\t')[:10000]
df = df.reindex(np.random.permutation(df.shape[0]))
df_train = df[:int(df.shape[0] * 0.8)]
df_test = df[int(df.shape[0] * 0.8):]

# Bag-of-Words

In [3]:
bag = dict()
for phrase in df_train['Phrase']:
    words = phrase.split(' ')
    for word in words:
        if word not in bag:
            bag[word] = len(bag)

In [4]:
X_train = np.zeros((df_train.shape[0], len(bag)))
for idx, phrase in enumerate(df_train['Phrase']):
    words = phrase.split(' ')
    for word in words:
        X_train[idx, bag[word]] = 1

In [5]:
Y_train = np.zeros((df_train.shape[0], num_classes))
for idx, sentiment in enumerate(df_train['Sentiment']):
    Y_train[idx, sentiment] = 1

In [6]:
X_test = np.zeros((df_test.shape[0], len(bag)))
for idx, phrase in enumerate(df_test['Phrase']):
    words = phrase.split(' ')
    for word in words:
        if word in bag:
            X_test[idx, bag[word]] = 1

In [7]:
W_init = 1e-4 * np.random.randn(len(bag), num_classes)

In [8]:
for lr in lrs:
    W = W_init.copy()
    for epoch in range(num_epochs):
        out = np.exp(np.dot(X_train, W))
        out = out / np.sum(out, axis=1, keepdims=True)
        dW = -(1 / df_train.shape[0]) * np.dot(X_train.T, out - Y_train)
        W += lr * dW

    out = np.exp(np.dot(X_test, W))
    out = out / np.sum(out, axis=1, keepdims=True)
    result = np.argmax(out, axis=1)
    print("Accuracy of batch when lr={}: {}".format(lr, np.mean(result == np.array(df_test['Sentiment']))))

Accuracy of batch when lr=1000.0: 0.4705
Accuracy of batch when lr=100.0: 0.5025
Accuracy of batch when lr=10.0: 0.495
Accuracy of batch when lr=1: 0.441
Accuracy of batch when lr=0.1: 0.4685
Accuracy of batch when lr=0.01: 0.49
Accuracy of batch when lr=0.001: 0.4305


In [9]:
for lr in lrs:
    W = W_init.copy()
    for epoch in range(num_epochs):
        for batch in range(df_train.shape[0] // batch_size):
            X = X_train[batch * batch_size:(batch + 1) * batch_size]
            Y = Y_train[batch * batch_size:(batch + 1) * batch_size]
            out = np.exp(np.dot(X, W))
            out = out / np.sum(out, axis=1, keepdims=True)
            dW = -(1 / batch_size) * np.dot(X.T, out - Y)
            W += lr * dW

    out = np.exp(np.dot(X_test, W))
    out = out / np.sum(out, axis=1, keepdims=True)
    result = np.argmax(out, axis=1)
    print("Accuracy of mini-batch when lr={}: {}".format(lr, np.mean(result == np.array(df_test['Sentiment']))))

  out = np.exp(np.dot(X, W))
  out = out / np.sum(out, axis=1, keepdims=True)


Accuracy of mini-batch when lr=1000.0: 0.0365
Accuracy of mini-batch when lr=100.0: 0.4665
Accuracy of mini-batch when lr=10.0: 0.4935
Accuracy of mini-batch when lr=1: 0.5235
Accuracy of mini-batch when lr=0.1: 0.509
Accuracy of mini-batch when lr=0.01: 0.472
Accuracy of mini-batch when lr=0.001: 0.441


In [10]:
for lr in lrs:
    W = W_init.copy()
    for epoch in range(num_epochs):
        order = np.random.permutation(df_train.shape[0])
        X_train_shuffle = X_train[order]
        Y_train_shuffle = Y_train[order]
        for batch in range(df_train.shape[0] // batch_size):
            X = X_train_shuffle[batch * batch_size:(batch + 1) * batch_size]
            Y = Y_train_shuffle[batch * batch_size:(batch + 1) * batch_size]
            out = np.exp(np.dot(X, W))
            out = out / np.sum(out, axis=1, keepdims=True)
            dW = -(1 / batch_size) * np.dot(X.T, out - Y)
            W += lr * dW

    out = np.exp(np.dot(X_test, W))
    out = out / np.sum(out, axis=1, keepdims=True)
    result = np.argmax(out, axis=1)
    print("Accuracy of shuffle when lr={}: {}".format(lr, np.mean(result == np.array(df_test['Sentiment']))))

  out = np.exp(np.dot(X, W))
  out = out / np.sum(out, axis=1, keepdims=True)


Accuracy of shuffle when lr=1000.0: 0.0365
Accuracy of shuffle when lr=100.0: 0.501
Accuracy of shuffle when lr=10.0: 0.5025
Accuracy of shuffle when lr=1: 0.5305
Accuracy of shuffle when lr=0.1: 0.5085
Accuracy of shuffle when lr=0.01: 0.471
Accuracy of shuffle when lr=0.001: 0.441


# N-gram

In [11]:
gram = dict()
for phrase in df_train['Phrase']:
    words = phrase.split(' ')
    for g in range(num_gram):
        for idx in range(len(words) - g):
            word = "".join(words[idx:idx + g + 1])
            if word not in gram:
               gram[word] = len(gram)

In [12]:
X_train = np.zeros((df_train.shape[0], len(gram)))
for idx, phrase in enumerate(df_train['Phrase']):
    words = phrase.split(' ')
    for g in range(num_gram):
        for pos in range(len(words) - g):
            word = "".join(words[pos:pos + g + 1])
            if word in gram:
                X_train[idx, gram[word]] = 1

In [13]:
Y_train = np.zeros((df_train.shape[0], num_classes))
for idx, sentiment in enumerate(df_train['Sentiment']):
    Y_train[idx, sentiment] = 1

In [14]:
X_test = np.zeros((df_test.shape[0], len(gram)))
for idx, phrase in enumerate(df_test['Phrase']):
    words = phrase.split(' ')
    for g in range(num_gram):
        for pos in range(len(words) - g):
            word = "".join(words[pos:pos + g + 1])
            if word in gram:
                X_test[idx, gram[word]] = 1

In [15]:
W_init = 1e-4 * np.random.randn(len(gram), num_classes)

In [16]:
for lr in lrs:
    W = W_init.copy()
    for epoch in range(num_epochs):
        out = np.exp(np.dot(X_train, W))
        out = out / np.sum(out, axis=1, keepdims=True)
        dW = -(1 / df_train.shape[0]) * np.dot(X_train.T, out - Y_train)
        W += lr * dW

    out = np.exp(np.dot(X_test, W))
    out = out / np.sum(out, axis=1, keepdims=True)
    result = np.argmax(out, axis=1)
    print("Accuracy of batch when lr={}: {}".format(lr, np.mean(result == np.array(df_test['Sentiment']))))

Accuracy of batch when lr=1000.0: 0.531
Accuracy of batch when lr=100.0: 0.5635
Accuracy of batch when lr=10.0: 0.532
Accuracy of batch when lr=1: 0.4845
Accuracy of batch when lr=0.1: 0.4665
Accuracy of batch when lr=0.01: 0.4815
Accuracy of batch when lr=0.001: 0.413


In [17]:
for lr in lrs:
    W = W_init.copy()
    for epoch in range(num_epochs):
        for batch in range(df_train.shape[0] // batch_size):
            X = X_train[batch * batch_size:(batch + 1) * batch_size]
            Y = Y_train[batch * batch_size:(batch + 1) * batch_size]
            out = np.exp(np.dot(X, W))
            out = out / np.sum(out, axis=1, keepdims=True)
            dW = -(1 / batch_size) * np.dot(X.T, out - Y)
            W += lr * dW

    out = np.exp(np.dot(X_test, W))
    out = out / np.sum(out, axis=1, keepdims=True)
    result = np.argmax(out, axis=1)
    print("Accuracy of mini-batch when lr={}: {}".format(lr, np.mean(result == np.array(df_test['Sentiment']))))

  out = np.exp(np.dot(X, W))
  out = out / np.sum(out, axis=1, keepdims=True)


Accuracy of mini-batch when lr=1000.0: 0.0365
Accuracy of mini-batch when lr=100.0: 0.5285
Accuracy of mini-batch when lr=10.0: 0.553
Accuracy of mini-batch when lr=1: 0.5695
Accuracy of mini-batch when lr=0.1: 0.562
Accuracy of mini-batch when lr=0.01: 0.5055
Accuracy of mini-batch when lr=0.001: 0.457


In [18]:
for lr in lrs:
    W = W_init.copy()
    for epoch in range(num_epochs):
        order = np.random.permutation(df_train.shape[0])
        X_train_shuffle = X_train[order]
        Y_train_shuffle = Y_train[order]
        for batch in range(df_train.shape[0] // batch_size):
            X = X_train_shuffle[batch * batch_size:(batch + 1) * batch_size]
            Y = Y_train_shuffle[batch * batch_size:(batch + 1) * batch_size]
            out = np.exp(np.dot(X, W))
            out = out / np.sum(out, axis=1, keepdims=True)
            dW = -(1 / batch_size) * np.dot(X.T, out - Y)
            W += lr * dW

    out = np.exp(np.dot(X_test, W))
    out = out / np.sum(out, axis=1, keepdims=True)
    result = np.argmax(out, axis=1)
    print("Accuracy of shuffle when lr={}: {}".format(lr, np.mean(result == np.array(df_test['Sentiment']))))

  out = np.exp(np.dot(X, W))
  out = out / np.sum(out, axis=1, keepdims=True)


Accuracy of shuffle when lr=1000.0: 0.0365
Accuracy of shuffle when lr=100.0: 0.5515
Accuracy of shuffle when lr=10.0: 0.5615
Accuracy of shuffle when lr=1: 0.573
Accuracy of shuffle when lr=0.1: 0.565
Accuracy of shuffle when lr=0.01: 0.505
Accuracy of shuffle when lr=0.001: 0.457
