In [1]:
data = """a man and a woman received mostly positive reviews gender identity refers to an individual personal sense of identity as man or \
          woman man and woman arrested after male attacked a man and a woman both aged in their thirty in nature man and woman i examine the \
          representation of men and women by focussing on the grammatical behaviour of the noun lemmas man and woman examined the frequency with \
          the words man men and woman women bachelor and spinster extend to basic terms for male and female human beings she looked at man woman \
          and boy girl and found that negative words are used more frequently with woman girl than with man boy the four directions are the \
          directions north east south and west appears to rotate around an axis passing through the north and south poles of the earth in these \
          locations whether the sun is moving from east to west through north or south by watching its movements the sun rises either north or \
          south of true east and sets north or south of true west for all locations the sun is seen to rise north of east and set north of west \
          analogue watch can be used to locate north and south this axis intersects the sphere at the north and south poles which appear to the \
          observer to lie directly above due north and south respectively on the horizon morning precedes afternoon evening and night in the \
          sequence of a day also true for evening and night night or nighttime is the period of darkness from sunset to sunrise unlike good \
          morning good afternoon and good evening good night is not used as a greeting what are the time ranges for morning afternoon evening \
          night and do they change with location or the seasons empire was ruled by an emperor king an emperor was also king grand prince was \
          reserved for an emperor or king grand prince in some languages is title before emperor and king start with the king or queen despite \
          they were an emperor or empress a queen regnant is a female monarch equivalent in rank to a king at the top of the hierarchy of royal \
          titles is the king and queen the relatives of the king and queen known as prince and princess only last royal titles work what the king \
          and queen actually do prince became a king a prince the male child of the king limited palette consisting of red yellow black and white \
          a mixture of yellow and black will appear as a variety of green basic colors include black white red green blue and yellow set of twelve \
          lack gray white pink red orange yellow green blue nonspectral colors are the colors gray black and white the antonym of white is black \
          black and white also appealed to architects tied with black and white black and white often represent the contrast between light and \
          darkness day and night male and female calendar are generally recognized spring summer autumn or fall and winter four seasons winter \
          spring summer autumn fall in the middle of summer and winter number of seasons between summer and winter can number from one to three \
          universally translated as spring summer autumn and winter but actually begin much earlier autumn is the season between summer and winter \
          lasting from march to june or from september to december summer is the season between spring and autumn from june to september or from \
          december to march winter spring summer and autumn are the seasons of the year december january and february are the winter months march \
          april and may are the spring months june july and august are the summer months day to learn about days weeks months and years twelve months \
          ake year seven days make week days fit into a week into a month and into a year each new day starts at midnight days in some months to make \
          them all add up to one year"""

In [None]:
import re
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.autograd import Variable
import matplotlib.pyplot as plt

def clean_text(
    string: str, 
    punctuations=r'''!()-[]{};:'"\,<>./?@#$%^&*_~''',
    stop_words=['the', 'a', 'and', 'is', 'be', 'will', 'or', 'by', 'to', 'of', 'an']) -> str:
    """
    A method to clean text 
    """
    # Cleaning the urls
    string = re.sub(r'https?://\S+|www\.\S+', '', string)

    # Cleaning the html elements
    string = re.sub(r'<.*?>', '', string)

    # Removing the punctuations
    for x in string.lower(): 
        if x in punctuations: 
            string = string.replace(x, "") 

    # Converting the text to lower
    string = string.lower()

    # Removing stop words
    string = ' '.join([word for word in string.split() if word not in stop_words])

    # Cleaning the whitespaces
    string = re.sub(r'\s+', ' ', string).strip()

    return string    



tokenized_corpus = clean_text(data).split()
vocabulary = list(set(tokenized_corpus))


word2idx = {w: idx for (idx, w) in enumerate(vocabulary)}
idx2word = {idx: w for (idx, w) in enumerate(vocabulary)}

vocabulary_size = len(vocabulary)

print(vocabulary_size)



window_size = 2
idx_pairs = []

indices = [word2idx[word] for word in tokenized_corpus]

for center_word_pos in range(len(indices)):
    for w in range(-window_size, window_size + 1):
        context_word_pos = center_word_pos + w
        if context_word_pos < 0 or context_word_pos >= len(indices) or center_word_pos == context_word_pos:
            continue
        context_word_idx = indices[context_word_pos]
        idx_pairs.append((indices[center_word_pos], context_word_idx))

idx_pairs = np.array(idx_pairs) 

def get_input_layer(word_idx):
    x = torch.zeros(vocabulary_size).float()
    x[word_idx] = 1.0
    return x


embedding_dims = 5
W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True)
W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True)
num_epochs = 150
learning_rate = 0.01

for epo in range(num_epochs):
    loss_val = 0
    for data, target in idx_pairs:
        x = Variable(get_input_layer(data)).float()
        y_true = Variable(torch.from_numpy(np.array([target])).long())

        z1 = torch.matmul(W1, x)
        z2 = torch.matmul(W2, z1)

        log_softmax = F.log_softmax(z2, dim=0)

        loss = F.nll_loss(log_softmax.view(1,-1), y_true)
        loss_val += loss.data.item()
        loss.backward()
        W1.data -= learning_rate * W1.grad.data
        W2.data -= learning_rate * W2.grad.data

        W1.grad.data.zero_()
        W2.grad.data.zero_()

    if epo % 10 == 0:    
        print(f'Loss at epo {epo}: {loss_val/len(idx_pairs)}')


embedding_dict = {}

for word in vocabulary:
    embedding_dict.update({
        word: torch.matmul(W1,get_input_layer(word2idx[word])).detach().numpy()
    })

245


  Variable._execution_engine.run_backward(


Loss at epo 0: 7.408570813540958
Loss at epo 10: 5.392810451608409
Loss at epo 20: 4.815252462134243
Loss at epo 30: 4.409559749151583
Loss at epo 40: 4.127143623879432
Loss at epo 50: 3.931317388518273
Loss at epo 60: 3.7861982633927
Loss at epo 70: 3.6722242269114105
