In [13]:
import torch
from torch.autograd import Variable
import numpy as np
import torch.functional as F
import torch.nn.functional as F

In [14]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
device = get_default_device()
print(device)

def to_device(data, device):
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

cuda


In [15]:
# corpus = [
#     'he is a king',
#     'she is a queen',
#     'he is a man',
#     'she is a woman',
#     'warsaw is poland capital',
#     'berlin is germany capital',
#     'paris is france capital',   
# ]

import re

al_regex = re.compile(r"[^a-zA-Z]")


class MBCorpus:
    """An iterator that yields sentences (lists of str)."""

    def __iter__(self):
        corpus_path = "data/mahabharat_gutenberg_lemmatized_sents.txt"
        with open(corpus_path) as fp:
            for line in fp.readlines():
                tokens = line.split()
                tokens = [al_regex.sub('', token) for token in tokens]
                yield tokens



In [16]:

mb_sents = MBCorpus()
vocabulary = []
for sentence in mb_sents:
    for token in sentence:
        if token not in vocabulary:
            vocabulary.append(token)

word2idx = {w: idx for (idx, w) in enumerate(vocabulary)}
idx2word = {idx: w for (idx, w) in enumerate(vocabulary)}

vocabulary_size = len(vocabulary)
print(f"vocabulary_size:{vocabulary_size}")

vocabulary_size:16456


In [None]:
def get_input_layer(word_idx):
    x = torch.zeros(vocabulary_size).type(torch.cuda.FloatTensor)
    x[word_idx] = 1.0
    
    return to_device(x, device)

In [35]:
# Example of target with class indices
input = torch.randn(3, 5, requires_grad=True)
target = torch.randint(5, (3,), dtype=torch.int64)
loss = F.cross_entropy(input, target)
loss.backward()
# Example of target with class probabilities
input = torch.randn(3, 5, requires_grad=True)
print(input)
print(target)
target = torch.randn(3, 5).softmax(dim=1)
loss = F.cross_entropy(input, target)
loss.backward()

tensor([[ 1.3342, -0.1637,  1.7855,  1.5707,  0.9831],
        [-1.7907,  0.5738, -1.0215, -0.5571, -0.4025],
        [-1.9868,  0.1662,  1.4823,  0.5362,  1.5519]], requires_grad=True)
tensor([1, 3, 0])


In [None]:
embedding_dims = 100
W1 = Variable(torch.randn(embedding_dims, vocabulary_size).type(torch.cuda.FloatTensor), requires_grad=True)
# to_device(W1, device)
W2 = Variable(torch.randn(vocabulary_size, embedding_dims).type(torch.cuda.FloatTensor), requires_grad=True)
# to_device(W2, device)
num_epochs = 101
learning_rate = 0.1
window_size = 2

for epo in range(num_epochs):
    loss_val = 0
    # for each sentence
    for sent_idx, sentence in enumerate(mb_sents):
        if sent_idx!=0 and sent_idx%100000==0:            
            print(f"processing {sent_idx}th sentence")
            # break
            
        for idx, target_word in enumerate(sentence):
            target_idx = word2idx[target_word]
            context = [sentence[idx] for idx in range(max(0, target_idx - window_size), min(target_idx + window_size + 1, len(sentence))) if idx != target_idx]
                # print(f"target_word:{target_word}, context:{context}")            
            if not context:
                continue
            context_indices = [word2idx[word] for word in context]
            
            for context_idx in context_indices:                
                x = Variable(get_input_layer(target_idx))
                # y_true = Variable(torch.from_numpy(np.array([context_idx])).long())
                y_true = Variable(get_input_layer(context_idx))
                # print(y_true)
                # print(f"W1*x: {W1.shape}*{x.shape}")
                z1 = torch.matmul(W1, x)
                # print(f"W2*z1: {W2.shape}*{z1.shape}")
                z2 = torch.matmul(W2, z1)
                # make z2 look like one-hot via softmax and then setting max probability to 1
                # z2_soft_max = F.softmax(z2, dim=0)
                # z2_one_hot = torch.zeros_like(z2_soft_max).type(torch.cuda.FloatTensor)
                # z2_one_hot[torch.argmax(z2_soft_max)] = 1
                
                
                # calculate softmax of z2
                sf_z2 = F.softmax(z2, dim=0)
                # print(f"sf_z2.shape: {sf_z2.shape}")
                # print(f"y_true.shape: {y_true.shape}")
                # apply cross entropy loss
                # print()
                loss = F.cross_entropy(sf_z2, y_true) #
                
                
                # print(f"z2: {z2}")
                # print(f"z2.shape: {z2.shape}")
                # print(f"z2: {z2}")
                # log_softmax = F.log_softmax(z2, dim=0)
                # print(f"log_softmax: {log_softmax}")
                # loss = F.nll_loss(log_softmax.view(1,-1), y_true)
                loss_val += loss.item()
                loss.backward()
                W1.data -= learning_rate * W1.grad.data
                W2.data -= learning_rate * W2.grad.data

                W1.grad.data.zero_()
                W2.grad.data.zero_()
    print(f'Loss at epo {epo}: {loss_val}')
        # get corresponding context embeddings
        # for each target, context pair train
    

processing 100000th sentence
processing 200000th sentence
processing 300000th sentence
Loss at epo 0: 1531889.403954506
processing 100000th sentence
processing 200000th sentence
processing 300000th sentence
Loss at epo 1: 1531878.3976774216
processing 100000th sentence
processing 200000th sentence
processing 300000th sentence
Loss at epo 2: 1531815.7917280197
processing 100000th sentence
processing 200000th sentence
processing 300000th sentence
Loss at epo 3: 1531801.3615150452
processing 100000th sentence
processing 200000th sentence
processing 300000th sentence
Loss at epo 4: 1531763.4347925186
processing 100000th sentence
processing 200000th sentence
processing 300000th sentence
Loss at epo 5: 1531759.412569046
processing 100000th sentence
processing 200000th sentence
processing 300000th sentence
Loss at epo 6: 1531759.330965042
processing 100000th sentence
processing 200000th sentence
processing 300000th sentence
Loss at epo 7: 1531759.262793541
processing 100000th sentence
process

In [8]:
embedding_dims = 5
W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True)
W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True)
num_epochs = 101
learning_rate = 0.001

for epo in range(num_epochs):
    loss_val = 0
    for data, target in idx_pairs:
        x = Variable(get_input_layer(data)).float()
        y_true = Variable(torch.from_numpy(np.array([target])).long())

        z1 = torch.matmul(W1, x)
        z2 = torch.matmul(W2, z1)
    
        log_softmax = F.log_softmax(z2, dim=0)

        loss = F.nll_loss(log_softmax.view(1,-1), y_true)
        loss_val += loss.item()
        loss.backward()
        W1.data -= learning_rate * W1.grad.data
        W2.data -= learning_rate * W2.grad.data

        W1.grad.data.zero_()
        W2.grad.data.zero_()
    if epo % 10 == 0:    
        print(f'Loss at epo {epo}: {loss_val/len(idx_pairs)}')

Loss at epo 0: 4.04387846807284
Loss at epo 10: 3.6861324227814163
Loss at epo 20: 3.4341715413544858
Loss at epo 30: 3.2438723781279157
Loss at epo 40: 3.098003941135747
Loss at epo 50: 2.984993450130735
Loss at epo 60: 2.895948580120291
Loss at epo 70: 2.8241932072809766
Loss at epo 80: 2.7649008263434682
Loss at epo 90: 2.714670770721776
Loss at epo 100: 2.6711281963757108
