# **Bigram Character Level Neural Network Language Model**

In [43]:
import torch
import numpy as np
import matplotlib.pyplot as plt

In [44]:
# load dataset
words = open('names.txt', 'r').read().splitlines()

In [45]:
# create training set
startToken = '<S>'
endToken = '<E>'

# create lookup table for converting characters to indices
chars = sorted(list(set(''.join(words)))) # all unique characters in the dataset
stoi = {s:i for i,s in enumerate(chars)} # string to index

# manually enumerate start and end token since they are not visible in the dataset
stoi[startToken] = len(stoi)
stoi[endToken] = len(stoi)

# index to string
itos = {i:s for s,i in stoi.items()}

### Prepare the training set of bigrams

In [46]:
xs, ys = [], [] # input and desired output/label/target

for w in words[:1]:
    chs = [startToken] + list(w) + [endToken] # add start and end tokens
    for ch1, ch2 in zip (chs, chs[1:]):
        ix1 = stoi[ch1]
        ix2 = stoi[ch2]
        xs.append(ix1) # when input is ch1...
        ys.append(ix2) # ...output should be ch2
        
# convert to tensors (tourch.tensor.dtype - int64, torch.Tensor.dtype - float32)
xs = torch.tensor(xs)
ys = torch.tensor(ys)

### Encode integers with one hot encoding
Converting categorical data into binary vector where only one element is set to 1. This is useful for feeding categorical data to neural network.

Example if we would like to encode fruits:
- "apple" -> [1, 0, 0]
- "banana" -> [0, 1, 0]
- "orange" -> [0, 0, 1]

In [47]:
import torch.nn.functional as F
xencoded = F.one_hot(xs, num_classes=len(stoi)).float()
yencoded = F.one_hot(ys, num_classes=len(stoi)).float()

### Create first layer of the network

In [48]:
# weights, filled with random values drawn from a normal distribution
W = torch.randn((len(stoi), 1))

In [52]:
print(xencoded.shape)
print(W.shape)

torch.Size([5, 28])
torch.Size([28, 1])


## Remember rules of matrix multiplication:
**A (m x n) * B (n x p) = C (m x p)**

**C[i][j] = sum(A[i][k] * B[k][j]) for k = 1 to n**