In [19]:
import pandas as pd
import torch
from torch.autograd import Variable
import torch.nn.functional as F
pd.options.display.max_rows = 100
pd.options.display.max_columns =100
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import sys
sys.path.insert(0, '..')
from Common import preprocessing,evaluation,CosineClassifier as cos
classes_map = {'DOC':0, 'ENTER':1, 'ORG':2, 'PRIV':3, 'RANG':4, 'HOST':5}

In [2]:
df = pd.read_csv('..//Data//data.txt', delimiter=';', engine='python',encoding='utf8')

In [3]:
questions = np.array(df.question)
questions = preprocessing.preprocess_list(questions)

In [4]:
tokenized_corpus = []
for q in questions:
    tokenized_corpus.append(q.split())

In [5]:
vocabulary = []
for sentence in tokenized_corpus:
    for token in sentence:
        if token not in vocabulary:
            vocabulary.append(token)

word2idx = {w: idx for (idx, w) in enumerate(vocabulary)}
idx2word = {idx: w for (idx, w) in enumerate(vocabulary)}

vocabulary_size = len(vocabulary)

In [9]:
window_size = 2
idx_pairs = []
# for each sentence
for sentence in tokenized_corpus:
    indices = [word2idx[word] for word in sentence]
    # for each word, threated as center word
    for center_word_pos in range(len(indices)):
        # for each window position
        for w in range(-window_size, window_size + 1):
            context_word_pos = center_word_pos + w
            # make soure not jump out sentence
            if context_word_pos < 0 or context_word_pos >= len(indices) or center_word_pos == context_word_pos:
                continue
            context_word_idx = indices[context_word_pos]
            idx_pairs.append((indices[center_word_pos], context_word_idx))

idx_pairs = np.array(idx_pairs) # it will be useful to have this as numpy array

In [14]:
def get_input_layer(word_idx):
    x = torch.zeros(vocabulary_size).float()
    x[word_idx] = 1.0
    return x

In [44]:
embedding_dims = 300
W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True)
W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True)
num_epochs = 100
learning_rate = 0.01

for epo in range(num_epochs):
    loss_val = 0
    for data, target in idx_pairs:
        x = Variable(get_input_layer(data)).float()
        y_true = Variable(torch.from_numpy(np.array([target])).long())

        z1 = torch.matmul(W1, x)
        z2 = torch.matmul(W2, z1)
        print(z2)
        log_softmax = F.log_softmax(z2, dim=0)
        
        loss = F.nll_loss(log_softmax.view(1,-1), y_true)
        loss_val += loss.data[0]
        loss.backward()
        W1.data -= learning_rate * W1.grad.data
        W2.data -= learning_rate * W2.grad.data

        W1.grad.data.zero_()
        W2.grad.data.zero_()
    if epo % 10 == 0:
        print(f'Loss at epo {epo}: {loss_val/len(idx_pairs)}')

tensor([ -0.9143, -17.5634,  -7.5035,  ...,  -7.1813, -27.7733,  33.1529],
       grad_fn=<MvBackward>)
tensor([ -0.7191, -11.5696,  -7.3219,  ...,  -7.1713, -27.5877,  32.8819],
       grad_fn=<MvBackward>)
tensor([ 32.2153, -11.8862, -15.5778,  ...,   6.7869, -11.3282,  26.3684],
       grad_fn=<MvBackward>)
tensor([ 37.7385, -11.2825, -15.6497,  ...,   6.6124, -11.1931,  26.1632],
       grad_fn=<MvBackward>)
tensor([ 37.6568, -11.0457,  -9.8261,  ...,   6.8058, -11.2043,  26.1125],
       grad_fn=<MvBackward>)
tensor([-29.4367,  -7.1796, -24.0452,  ...,  -9.9848,  14.3188, -19.9247],
       grad_fn=<MvBackward>)
tensor([-23.8617,  -7.2792, -24.1379,  ...,  -9.7173,  14.3023, -20.3252],
       grad_fn=<MvBackward>)
tensor([-23.8389,  -1.4576, -24.1525,  ...,  -9.6678,  14.2578, -20.5930],
       grad_fn=<MvBackward>)
tensor([-23.9585,  -1.4717, -24.0826,  ...,  -9.6451,  14.2560, -21.0505],
       grad_fn=<MvBackward>)
tensor([ -0.1639,   7.1561, -24.5879,  ...,  15.1725, -37.6663, 

tensor([-16.1460,   6.7382, -14.1854,  ...,   1.1751,   5.4973,  27.8813],
       grad_fn=<MvBackward>)
tensor([-15.8634,   7.1324, -14.6115,  ...,   1.2761,   5.5964,  27.5321],
       grad_fn=<MvBackward>)
tensor([-15.8786,   7.2960, -15.1432,  ...,   1.1234,   5.7541,  27.4925],
       grad_fn=<MvBackward>)
tensor([-15.8396,   7.2006, -14.8250,  ...,   0.7525,   5.9936,  27.2158],
       grad_fn=<MvBackward>)
tensor([  0.0104, -20.5553, -38.8966,  ..., -28.9827,   2.6709, -26.9312],
       grad_fn=<MvBackward>)
tensor([ -0.1266, -20.3717, -39.2153,  ..., -29.0587,   2.9022, -26.7672],
       grad_fn=<MvBackward>)
tensor([ -0.0569, -20.2303, -39.1597,  ..., -28.9921,   2.8477, -26.8576],
       grad_fn=<MvBackward>)
tensor([ -0.2924, -19.8061, -39.2160,  ..., -29.0992,   2.7930, -26.6816],
       grad_fn=<MvBackward>)
tensor([ 29.2521, -11.5064,  27.2044,  ..., -15.5035,  -9.5871, -19.6580],
       grad_fn=<MvBackward>)
tensor([ 29.1596, -11.4640,  27.1108,  ..., -15.3886,  -9.5792, 

tensor([-15.9147,   7.5382, -15.4239,  ...,   0.4653,   6.6467,  27.6766],
       grad_fn=<MvBackward>)
tensor([-15.6845,   7.6094, -15.3535,  ...,   0.3628,   6.6783,  27.4670],
       grad_fn=<MvBackward>)
tensor([-15.6397,   7.4544, -15.1158,  ...,   0.1704,   6.7509,  27.4046],
       grad_fn=<MvBackward>)
tensor([ -9.9613, -20.7902,  -0.5185,  ...,   9.1127, -12.1601,  -6.1660],
       grad_fn=<MvBackward>)
tensor([ -9.9726, -20.7162,  -0.5265,  ...,   8.8544, -12.4307,  -6.0836],
       grad_fn=<MvBackward>)
tensor([-10.0442, -20.7543,  -0.5029,  ...,   8.7713, -12.5165,  -5.9344],
       grad_fn=<MvBackward>)
tensor([ -9.8277, -20.7340,  -0.4305,  ...,   9.1261, -12.4194,  -6.0614],
       grad_fn=<MvBackward>)
tensor([-15.4880,   7.4206, -15.0159,  ...,   0.2371,   6.9633,  27.2649],
       grad_fn=<MvBackward>)
tensor([-15.3547,   7.3761, -14.8966,  ...,   0.3165,   7.1723,  27.1299],
       grad_fn=<MvBackward>)
tensor([-15.3369,   7.2097, -14.6298,  ...,   0.1323,   7.2402, 

KeyboardInterrupt: 

In [29]:
Variable(torch.from_numpy(np.array([1])).long())

tensor([1])

In [59]:
W1.grad.data

tensor([[0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])