In [1]:
import torch
import numpy as np
from datasets import load_dataset
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')
batch_size = 64

# Creating vocabulory

In [3]:
n = 3
embedding_dims = 10 # how many dimensional vector should represent each word in the vocabulory

In [4]:
ds = load_dataset("MuskumPillerum/General-Knowledge")
df = ds['train'][:10]

In [5]:
def tokenize(sentence):
    data = sentence
    split_terms = [',', '.', '!', '?', '(', ')', '&', '$', '+', '-', '/', '*', ';', ':']
    for split_term in split_terms:
        if split_term in sentence:
            data = data.replace(split_term, f' {split_term} ')
    data = data.split()
    return data

tokenize('what is your !! ! name ?    ')

['what', 'is', 'your', '!', '!', '!', 'name', '?']

In [6]:
vocab_list = set()
X = []
for x,y in zip(df['Question'], df['Answer']):
    data = f'Question: {x} Answer: {y}'
    data = data.lower().replace('\\n', '')
    vocab_list.update(tokenize(data))
    X.append(data)

In [7]:
vocab_list.add('<UNK>')

In [8]:
vocab = {v:i for v,i in zip(vocab_list, range(0, len(vocab_list)+1))}
len(vocab)

141

In [9]:
vocab

{'enabling': 0,
 'tasks': 1,
 '<UNK>': 2,
 'make': 3,
 'perform': 4,
 'ability': 5,
 'its': 6,
 'would': 7,
 'and': 8,
 'based': 9,
 'layers': 10,
 'generate': 11,
 'take': 12,
 'networks': 13,
 'from': 14,
 'reinforcement': 15,
 'various': 16,
 'function': 17,
 'specific': 18,
 'being': 19,
 'it': 20,
 'in': 21,
 'analyze': 22,
 'aims': 23,
 'time': 24,
 'structure': 25,
 'task': 26,
 'ai': 27,
 'system': 28,
 'examples': 29,
 'neural': 30,
 'designed': 31,
 'where': 32,
 'machine': 33,
 'decisions': 34,
 'reward': 35,
 'signal': 36,
 'feedback': 37,
 'complex': 38,
 'artificial': 39,
 'while': 40,
 'receives': 41,
 'learning': 42,
 'require': 43,
 'answer': 44,
 'relationships': 45,
 'world': 46,
 'type': 47,
 'perception': 48,
 'natural': 49,
 'network': 50,
 'improve': 51,
 'subset': 52,
 'on': 53,
 'learn': 54,
 'algorithms': 55,
 'intelligence': 56,
 '-': 57,
 'main': 58,
 'form': 59,
 'way': 60,
 'of': 61,
 'environment': 62,
 'patterns': 63,
 'maximize': 64,
 'refers': 65,
 ','

In [10]:
embedding_matrix = torch.randn(len(vocab),embedding_dims)
embedding_matrix.shape

torch.Size([141, 10])

In [11]:
embedding_matrix[[1,2,3]]

tensor([[-1.0020, -1.2304,  0.8351, -0.9963, -1.8611, -1.8046,  0.5524, -0.1958,
          0.7299,  1.4512],
        [-0.8595,  1.1284,  0.6939, -1.1053, -0.2888,  1.9407,  0.3709,  0.6215,
         -0.1729, -0.8204],
        [ 1.4746, -0.0752,  0.0389, -0.8732, -1.4866, -0.5669, -0.4529, -1.2934,
         -0.9763, -0.3258]])

In [12]:
def get_word_embedding(word, vocab=vocab, embedding_matrix=embedding_matrix):
    if word not in vocab:
        word = '<UNK>'
    embedding = embedding_matrix[vocab[word]]
    return embedding

get_word_embedding('as')

tensor([-1.3235,  0.4795,  2.3164,  0.8038, -0.3986, -0.2895,  0.2415,  0.8055,
        -0.1371, -0.2991])

# Creating the model

In [13]:
from print_color import print

In [14]:
class NeuralNetwork(nn.Module):
    def __init__(self, n=n, num_hidden_layer=1024, vocab_len = len(vocab_list), dim_embedding=10):
        super(NeuralNetwork, self).__init__()
        self.n = n
        self.dim_embedding = dim_embedding
        # self.embedding = torch.rand(vocab_len, dim_embedding).to(device)
        self.embedding = nn.Embedding(vocab_len, dim_embedding).to(device)

        self.hidden_layer = nn.Linear((n-1)*dim_embedding, num_hidden_layer)
        self.relu = nn.ReLU()
        self.output = nn.Linear(num_hidden_layer, vocab_len)
        # self.softmax = nn.Softmax(dim=1)

    def forward(self,x):
        # x will be the indeices of embedding representing the input words
        # print(self.embedding(x), color='purple')
        x_embeddings = self.embedding(x).view(-1,(self.n-1)*self.dim_embedding).to(device)

        out = self.hidden_layer(x_embeddings)
        out = self.relu(out)
        out = self.output(out)
        # out = self.softmax(out)
        return out

model = NeuralNetwork(n=3).to(device)

# Creating dataset

In [15]:
dataset = []
for x,y in zip(df['Question'], df['Answer']):
    data = f'Question: {x} Answer: {y}'.lower()
    tokenized_data = tokenize(data)
    for i in range(len(tokenized_data)-n):
        # print(i)
        data_i = tokenized_data[i:i+n]
        dataset.append([vocab[i] if i in vocab else vocab['<UNK>'] for i in data_i])
    # print()

dataset_np = np.array(dataset)
dataset_np.shape

(419, 3)

In [16]:
class QuestionAnswerDataset(Dataset):
    def __init__(self, dataset):
        self.dataset = dataset
        self.x = dataset[:,[i for i in range(n-1)]]
        self.y = dataset[:,-1]
        self.m, self.n = self.x.shape
    
    def __getitem__(self, index):
        return self.x[index], self.y[index]
    
    def __len__(self):
        return self.m
    
dataset = QuestionAnswerDataset(dataset=dataset_np)
dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True, num_workers=0)

# Training

In [23]:
num_epochs = 100
lr = 0.01
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model = model.to(device)

cpu[0m


In [18]:
# model = torch.compile(model)

In [25]:
lossCategory = nn.CrossEntropyLoss()
optimiser = torch.optim.SGD(model.parameters(), lr=lr)

In [26]:
for epoch in range(num_epochs):
    for i, (input, label) in enumerate(dataloader):
        x = input.to(device)
        y = label.to(device)
        y_pred = model(x)
        loss = lossCategory(y_pred, y)
        print(loss.item())
        loss.backward()
        optimiser.step()
        optimiser.zero_grad()


4.204731464385986[0m
4.2329816818237305[0m
4.296705722808838[0m
4.092865467071533[0m
4.115166664123535[0m
4.220170974731445[0m
4.070713520050049[0m
4.1469902992248535[0m
4.198991298675537[0m
4.118839263916016[0m
4.223509311676025[0m
3.9694974422454834[0m
4.153100967407227[0m
4.031463623046875[0m
4.184702396392822[0m
3.953085422515869[0m
4.2832818031311035[0m
4.07787561416626[0m
3.7766098976135254[0m
4.080444812774658[0m
4.212955951690674[0m
4.15690279006958[0m
3.8221802711486816[0m
4.144639015197754[0m
3.9296045303344727[0m
4.111403942108154[0m
3.9527649879455566[0m
4.041698455810547[0m
4.188329696655273[0m
4.0074872970581055[0m
3.8784728050231934[0m
3.7009410858154297[0m
4.2055864334106445[0m
3.7735376358032227[0m
4.143100261688232[0m
3.900527238845825[0m
3.965883255004883[0m
3.865996837615967[0m
3.8945984840393066[0m
3.9219560623168945[0m
3.982775926589966[0m
4.001570701599121[0m
3.8734524250030518[0m
4.090763568878174[0m
3.744917154312134

In [21]:
n, d, m = 3, 5, 7
embedding = nn.Embedding(n, d, max_norm=True)
W = torch.randn((m, d), requires_grad=True)
idx = torch.tensor([1, 2])
a = embedding.weight.clone() @ W.t()  # weight must be cloned for this to be differentiable
b = embedding(idx) @ W.t()  # modifies weight in-place
out = (a.unsqueeze(0) + b.unsqueeze(1))
loss = out.sigmoid().prod()
loss.backward()

In [22]:
embedding(torch.tensor(1))

tensor([ 0.4364,  0.0234, -0.4983,  0.7483,  0.0269],
       grad_fn=<EmbeddingBackward0>)