In [96]:
import nltk
from nltk.tokenize import punkt
#import emoji
import re
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

In [72]:
corpus = 'I am happy because I am learning'

In [73]:
def preprocess(corpus):
    data = re.sub(r'[,!?;-]+','.', corpus)
    data = nltk.word_tokenize(data)
    data = [ch.lower() for ch in data if ch.isalpha() or ch == '.']
    return data

words = preprocess(corpus)
print(words)

['i', 'am', 'happy', 'because', 'i', 'am', 'learning']


In [74]:
def get_windows(words, C):
    windows = []
    for i in range(C,len(words)-C):
        center_word = words[i]
        context_words = words[(i-C):i] + words[(i+1):(i+C+1)]
        yield  context_words, center_word

print(get_windows(words,2))

<generator object get_windows at 0x7fcfc8fbd9a0>


In [75]:
def build_vocab(corpus):
    vocab = {}
    for word in corpus:
        vocab[word] = vocab.get(word,0)+1
    return sorted(vocab.keys())

In [76]:
vocab = build_vocab(words)

In [77]:
def one_hot_vector(sorted_vocab, word):
    vec = np.zeros(len(sorted_vocab))
    #print(vec)
    vec[sorted_vocab.index(word)] = 1
    #print(vec)
    return vec

In [78]:
print(one_hot_vector(vocab, "because"))

[0. 1. 0. 0. 0.]


In [79]:
def one_hot_context_words(context_words, vocab):
    vec = np.mean([one_hot_vector(vocab, word) for word in context_words ], axis=0)
    return vec

In [80]:
print(one_hot_context_words(["i","am","because","i"], vocab))

[0.25 0.25 0.   0.5  0.  ]


In [81]:
for x,y in get_windows(words,2):
    print(f'{x},{one_hot_context_words(x, vocab)},{y}, {one_hot_vector(vocab,y)}')

['i', 'am', 'because', 'i'],[0.25 0.25 0.   0.5  0.  ],happy, [0. 0. 1. 0. 0.]
['am', 'happy', 'i', 'am'],[0.5  0.   0.25 0.25 0.  ],because, [0. 1. 0. 0. 0.]
['happy', 'because', 'am', 'learning'],[0.25 0.25 0.25 0.   0.25],i, [0. 0. 0. 1. 0.]


In [82]:
def reLU(input):
    return int(input>0)*input

In [90]:
def softmax(z_vec):
    exp = np.exp(z_vec)
    #print(exp)
    denom = np.sum(exp)
    #print(soft)
    return exp/denom


In [94]:
soft = softmax(np.array([9,8,11,10,8.5]))

In [92]:
def cross_entropy_loss(actual, predicted):
    return -np.sum(actual * np.log(predicted))


In [95]:
print(cross_entropy_loss(one_hot_vector(vocab,"happy"), soft))

0.49169588636530653


In [None]:
class Model(nn.Module):

    def __init__(self, in_features=4, h1=8, h2=9, out_features=3) -> None:
        super().__init__()
        self.fc1 = nn.Linear(in_features,h1)
        self.fc2 = nn.Linear(h1,h2)
        self.out = nn.Linear(h2, out_features)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.out(x)

        return x 