In [88]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib as mpl
# import matplotlib and numpy together 
import matplotlib.pylab as pylab
import numpy as np
%matplotlib inline

In [89]:
#Data Prepration
# it is module to work with  regular expression which has method such as re.compile(), re.search(), re.match(), re.findall()
import re

In [90]:
sentences = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells."""

Clean Data

In [91]:
# remove special characters
# remove non letter and digit from sentences
sentences = re.sub('[^A-Za-z0-9]+', ' ', sentences)

# remove 1 letter words
# reomve one letter word and replace it with ' '
#  strip() is used to remove space from starting and ending
sentences = re.sub(r'(?:^| )\w(?:$| )', ' ', sentences).strip()

# lower all characters
sentences = sentences.lower()

Vocabulary

In [92]:
# slipt the words into the list
words = sentences.split()
#convert the list into the set which do not contain duplciate
vocab = set(words)

In [93]:
# calcualte the length of set
vocab_size = len(vocab)
#dimensionality of word embeddings
embed_dim = 10
context_size = 2

Implementation 

In [94]:
# assign idx to each word 
word_to_ix = {word: i for i, word in enumerate(vocab)}
# reverse 
ix_to_word = {i: word for i, word in enumerate(vocab)}

Data bags

In [95]:
# data - [(context), target]

data = []
for i in range(2, len(words) - 2):
    context = [words[i - 2], words[i - 1], words[i + 1], words[i + 2]]
    target = words[i]
    data.append((context, target))
print(data[:5])

[(['we', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'computational'], 'idea')]


Embeddings

In [96]:
# creating 2d array where vocab_size(number of unique words in your vocabulary.) as no of rows and embed_dim(imensionality of the word) as colum
embeddings =  np.random.random_sample((vocab_size, embed_dim))

Linear Model

In [97]:
# computer linear tranformation of tensor  'm' and a weight matrix w
def linear(m, theta):
    w = theta
    return m.dot(w)

Log softmax + NLLloss = Cross Entropy

In [98]:
# calculates the log softmax of the input tensor x
def log_softmax(x):
    e_x = np.exp(x - np.max(x))
    return np.log(e_x / e_x.sum())

In [99]:
# This function computes the negative log-likelihood loss given log probabilities (logs) and target indices (targets)
def NLLLoss(logs, targets):
    out = logs[range(len(targets)), targets]
    return -out.sum()/len(out)

In [100]:
def log_softmax_crossentropy_with_logits(logits,target):

    out = np.zeros_like(logits)
    out[np.arange(len(logits)),target] = 1
    
    softmax = np.exp(logits) / np.exp(logits).sum(axis=-1,keepdims=True)
    
    return (- out + softmax) / logits.shape[0]

Forward function

In [101]:

def forward(context_idxs, theta):
    m = embeddings[context_idxs].reshape(1, -1)
    n = linear(m, theta)
    o = log_softmax(n)
    
    return m, n, o

Backward function

In [102]:
def backward(preds, theta, target_idxs):
    m, n, o = preds
    
    dlog = log_softmax_crossentropy_with_logits(n, target_idxs)
    dw = m.T.dot(dlog)
    
    return dw

Optimize function

In [103]:
def optimize(theta, grad, lr=0.03):
    theta -= grad * lr
    return theta

Training 

In [104]:
#Genrate training data

theta = np.random.uniform(-1, 1, (2 * context_size * embed_dim, vocab_size))

In [105]:
epoch_losses = {}

for epoch in range(80):

    losses =  []

    for context, target in data:
        context_idxs = np.array([word_to_ix[w] for w in context])
        preds = forward(context_idxs, theta)

        target_idxs = np.array([word_to_ix[target]])
        loss = NLLLoss(preds[-1], target_idxs)

        losses.append(loss)

        grad = backward(preds, theta, target_idxs)
        theta = optimize(theta, grad, lr=0.03)
        
     
    epoch_losses[epoch] = losses

Analyze 

Plot loss/epoch

In [106]:
# ix = np.arange(0,80)

# fig = plt.figure()
# fig.suptitle('Epoch/Losses', fontsize=20)
# plt.plot(ix,[epoch_losses[i][0] for i in ix])
# plt.xlabel('Epochs', fontsize=12)
# plt.ylabel('Losses', fontsize=12)

Predict function

In [107]:
def predict(words):
    context_idxs = np.array([word_to_ix[w] for w in words])
    preds = forward(context_idxs, theta)
    word = ix_to_word[np.argmax(preds[-1])]
    
    return word

In [108]:
# (['we', 'are', 'to', 'study'], 'about')
predict(['we', 'are', 'to', 'study'])

'about'

Accuracy

In [109]:
def accuracy():
    wrong = 0

    for context, target in data:
        if(predict(context) != target):
            wrong += 1
            
    return (1 - (wrong / len(data)))

In [110]:
accuracy()

1.0

In [111]:
predict(['processes', 'manipulate', 'things', 'study'])

'computational'