In [1]:
sentences = """We are about to study the idea of a computational process.
Computational processes are abstract beings that inhabit computers.
As they evolve, processes manipulate other abstract things called data.
The evolution of a process is directed by a pattern of rules
called a program. People create programs to direct processes. In effect,
we conjure the spirits of the computer with our spells."""

In [17]:
import re
import numpy as np

In [4]:
sentences = re.sub('[^A-Za-z0-9]+', ' ', sentences)

# remove 1 letter words
sentences = re.sub(r'(?:^| )\w(?:$| )', ' ', sentences).strip()

# lower all characters
sentences = sentences.lower()

In [5]:
sentences

'we are about to study the idea of computational process computational processes are abstract beings that inhabit computers as they evolve processes manipulate other abstract things called data the evolution of process is directed by pattern of rules called program people create programs to direct processes in effect we conjure the spirits of the computer with our spells'

In [7]:
words = sentences.split()
vocab = set(words)

In [8]:
words

['we',
 'are',
 'about',
 'to',
 'study',
 'the',
 'idea',
 'of',
 'computational',
 'process',
 'computational',
 'processes',
 'are',
 'abstract',
 'beings',
 'that',
 'inhabit',
 'computers',
 'as',
 'they',
 'evolve',
 'processes',
 'manipulate',
 'other',
 'abstract',
 'things',
 'called',
 'data',
 'the',
 'evolution',
 'of',
 'process',
 'is',
 'directed',
 'by',
 'pattern',
 'of',
 'rules',
 'called',
 'program',
 'people',
 'create',
 'programs',
 'to',
 'direct',
 'processes',
 'in',
 'effect',
 'we',
 'conjure',
 'the',
 'spirits',
 'of',
 'the',
 'computer',
 'with',
 'our',
 'spells']

In [9]:
vocab

{'about',
 'abstract',
 'are',
 'as',
 'beings',
 'by',
 'called',
 'computational',
 'computer',
 'computers',
 'conjure',
 'create',
 'data',
 'direct',
 'directed',
 'effect',
 'evolution',
 'evolve',
 'idea',
 'in',
 'inhabit',
 'is',
 'manipulate',
 'of',
 'other',
 'our',
 'pattern',
 'people',
 'process',
 'processes',
 'program',
 'programs',
 'rules',
 'spells',
 'spirits',
 'study',
 'that',
 'the',
 'they',
 'things',
 'to',
 'we',
 'with'}

In [10]:
vocab_size = len(vocab)
embed_dim = 10
context_size = 2

In [11]:
vocab_size

43

In [12]:
word_to_ix = {word: i for i, word in enumerate(vocab)}
ix_to_word = {i: word for i, word in enumerate(vocab)}

In [13]:
word_to_ix

{'is': 0,
 'other': 1,
 'with': 2,
 'study': 3,
 'of': 4,
 'idea': 5,
 'data': 6,
 'direct': 7,
 'abstract': 8,
 'that': 9,
 'spells': 10,
 'spirits': 11,
 'evolution': 12,
 'to': 13,
 'they': 14,
 'by': 15,
 'manipulate': 16,
 'program': 17,
 'called': 18,
 'the': 19,
 'our': 20,
 'effect': 21,
 'things': 22,
 'computer': 23,
 'beings': 24,
 'evolve': 25,
 'pattern': 26,
 'processes': 27,
 'are': 28,
 'rules': 29,
 'people': 30,
 'conjure': 31,
 'about': 32,
 'programs': 33,
 'process': 34,
 'as': 35,
 'we': 36,
 'computational': 37,
 'directed': 38,
 'inhabit': 39,
 'create': 40,
 'in': 41,
 'computers': 42}

In [14]:
ix_to_word

{0: 'is',
 1: 'other',
 2: 'with',
 3: 'study',
 4: 'of',
 5: 'idea',
 6: 'data',
 7: 'direct',
 8: 'abstract',
 9: 'that',
 10: 'spells',
 11: 'spirits',
 12: 'evolution',
 13: 'to',
 14: 'they',
 15: 'by',
 16: 'manipulate',
 17: 'program',
 18: 'called',
 19: 'the',
 20: 'our',
 21: 'effect',
 22: 'things',
 23: 'computer',
 24: 'beings',
 25: 'evolve',
 26: 'pattern',
 27: 'processes',
 28: 'are',
 29: 'rules',
 30: 'people',
 31: 'conjure',
 32: 'about',
 33: 'programs',
 34: 'process',
 35: 'as',
 36: 'we',
 37: 'computational',
 38: 'directed',
 39: 'inhabit',
 40: 'create',
 41: 'in',
 42: 'computers'}

In [15]:
data = []
for i in range(2, len(words) - 2):
    context = [words[i - 2], words[i - 1], words[i + 1], words[i + 2]]
    target = words[i]
    data.append((context, target))
print(data[:5])

[(['we', 'are', 'to', 'study'], 'about'), (['are', 'about', 'study', 'the'], 'to'), (['about', 'to', 'the', 'idea'], 'study'), (['to', 'study', 'idea', 'of'], 'the'), (['study', 'the', 'of', 'computational'], 'idea')]


In [73]:
embeddings =  np.random.random_sample((vocab_size, embed_dim))
embeddings.shape

(43, 10)

In [20]:
def linear(m, theta):
    w = theta
    return m.dot(w)

In [23]:
np.max()

2.5

In [121]:
def NLLLoss(logs, targets):
    print("Soft?",logs[0][32])
    print("targets",targets)
    print("len_targets",len(targets))
    out = logs[range(len(targets)), targets]
    print("out",out)
    return -out.sum()/len(out)

In [35]:
def log_softmax(x):
    #print("Max(x)",np.max(x))
    e_x = np.exp(x - np.max(x))
    return np.log(e_x / e_x.sum())

In [34]:
def log_softmax_crossentropy_with_logits(logits,target):

    out = np.zeros_like(logits)
    #print("Output",out)
    out[np.arange(len(logits)),target] = 1
    softmax = np.exp(logits) / np.exp(logits).sum(axis=-1,keepdims=True)
    
    return (- out + softmax) / logits.shape[0]

In [126]:
def forward(context_idxs, theta):
    #print("Embeddings of context ids",embeddings[context_idxs])
    m = embeddings[context_idxs].reshape(1, -1)
   # print("Feedforward",m)
   # print("Feedforward shape",m.shape)
    n = linear(m, theta)
    #print("Linear",n)
    o = log_softmax(n)
   # print("Output",o.shape)
    
    return m, n, o

In [29]:
def backward(preds, theta, target_idxs):
    m, n, o = preds
    
    dlog = log_softmax_crossentropy_with_logits(n, target_idxs)
    dw = m.T.dot(dlog)
    
    return dw

In [106]:
a=[1,2,3]

In [107]:
a[-1]

3

In [88]:
theta = np.random.uniform(-1, 1, (2* context_size * embed_dim, vocab_size))
theta

array([[ 0.19398642,  0.47488071, -0.19330967, ...,  0.90056799,
         0.59291525,  0.89472014],
       [-0.86479823, -0.98798674,  0.21539986, ..., -0.70982109,
         0.91966191, -0.23248075],
       [ 0.87902656, -0.7906484 ,  0.3084989 , ...,  0.14760591,
        -0.23308727, -0.97761376],
       ...,
       [ 0.6608078 , -0.99179163,  0.79492688, ..., -0.42627039,
        -0.63880795,  0.51782057],
       [-0.69988148, -0.35806668, -0.11639432, ..., -0.52873693,
        -0.54576869, -0.43067009],
       [ 0.22729056, -0.39434002, -0.4226128 , ..., -0.13348044,
         0.20108782, -0.55456833]])

In [90]:
theta.shape

(40, 43)

In [31]:
def optimize(theta, grad, lr=0.03):
    theta -= grad * lr
    return theta

In [124]:
epoch_losses = {}

for epoch in range(1):

    losses =  []

    for context, target in data:
        context_idxs = np.array([word_to_ix[w] for w in context])
        #print("Context ID",context_idxs)
        preds = forward(context_idxs, theta)
        #print("Softmax",preds[-1])
        target_idxs = np.array([word_to_ix[target]])
        #print("Target_idxs",target_idxs)
        loss = NLLLoss(preds[-1], target_idxs)
        print("loss",loss)
        losses.append(loss)

        grad = backward(preds, theta, target_idxs)
        theta = optimize(theta, grad, lr=0.03)
        
     
    epoch_losses[epoch] = losses

Linear (1, 43)
Soft? -2.9634560174310294
targets [32]
len_targets 1
out [-2.96345602]
loss 2.9634560174310294
Linear (1, 43)
Soft? -4.963509677005591
targets [13]
len_targets 1
out [-2.98587773]
loss 2.985877725419211
Linear (1, 43)
Soft? -3.5945281187827276
targets [3]
len_targets 1
out [-2.84226983]
loss 2.84226982919546
Linear (1, 43)
Soft? -3.6759763195782127
targets [19]
len_targets 1
out [-1.82058734]
loss 1.8205873366364453
Linear (1, 43)
Soft? -4.714172557083768
targets [5]
len_targets 1
out [-3.17008978]
loss 3.1700897805627353
Linear (1, 43)
Soft? -4.168596962607323
targets [4]
len_targets 1
out [-1.86535196]
loss 1.8653519616907914
Linear (1, 43)
Soft? -4.438583656890541
targets [37]
len_targets 1
out [-2.23145146]
loss 2.2314514552503613
Linear (1, 43)
Soft? -4.556822906816872
targets [34]
len_targets 1
out [-2.25260214]
loss 2.2526021366160736
Linear (1, 43)
Soft? -4.239736759356406
targets [37]
len_targets 1
out [-2.24517368]
loss 2.2451736809256757
Linear (1, 43)
Soft? -

In [45]:
def predict(words):
    context_idxs = np.array([word_to_ix[w] for w in words])
    preds = forward(context_idxs, theta)
    word = ix_to_word[np.argmax(preds[-1])]
    
    return word

In [46]:
predict(['we', 'are', 'to', 'study'])

'about'

In [47]:
def accuracy():
    wrong = 0

    for context, target in data:
        if(predict(context) != target):
            wrong += 1
            
    return (1 - (wrong / len(data)))

In [48]:
accuracy()

1.0

SyntaxError: invalid syntax (<ipython-input-1-e4fb996c124e>, line 1)