## Търсене и извличане на информация. Приложение на дълбоко машинно обучение
> ### Стоян Михов
> #### Зимен семестър 2021/2022

### Упражнение 11

 За да работи програмата трябва корпуса от публицистични текстове за Югоизточна Европа,
 да се намира разархивиран в директорията, в която е програмата (виж упражнение 2).

 Преди да се стартира програмата е необходимо да се активира съответното обкръжение с командата: `conda activate tii`

In [25]:
import sys
import nltk
nltk.download("punkt")
from nltk.corpus import PlaintextCorpusReader
import numpy as np
import torch

[nltk_data] Downloading package punkt to /home/psarlov/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [2]:
X = torch.tensor([[1.2,2,3],[4,5,6]]);X

tensor([[1.2000, 2.0000, 3.0000],
        [4.0000, 5.0000, 6.0000]])

In [3]:
Y = torch.tensor([[3,2,1],[2,3,4.1]], requires_grad=True);Y

tensor([[3.0000, 2.0000, 1.0000],
        [2.0000, 3.0000, 4.1000]], requires_grad=True)

In [4]:
A = torch.rand(3,4, requires_grad=True);A

tensor([[0.8554, 0.9563, 0.1901, 0.0984],
        [0.2970, 0.6576, 0.7569, 0.2961],
        [0.2996, 0.8576, 0.0432, 0.2611]], requires_grad=True)

In [5]:
B = torch.matmul(X+Y,A);B

tensor([[ 5.9788, 10.0771,  3.9991,  2.6420],
        [10.5339, 19.6603,  7.6325,  5.5964]], grad_fn=<MmBackward>)

In [6]:
C = torch.sum(-2 * B);C

tensor(-132.2402, grad_fn=<SumBackward0>)

In [7]:
A.grad

In [8]:
C.backward()

In [9]:
A.grad

tensor([[-20.4000, -20.4000, -20.4000, -20.4000],
        [-24.0000, -24.0000, -24.0000, -24.0000],
        [-28.2000, -28.2000, -28.2000, -28.2000]])

In [10]:
print(A.grad)
print(Y.grad)

tensor([[-20.4000, -20.4000, -20.4000, -20.4000],
        [-24.0000, -24.0000, -24.0000, -24.0000],
        [-28.2000, -28.2000, -28.2000, -28.2000]])
tensor([[-4.2002, -4.0153, -2.9230],
        [-4.2002, -4.0153, -2.9230]])


In [11]:
A

tensor([[0.8554, 0.9563, 0.1901, 0.0984],
        [0.2970, 0.6576, 0.7569, 0.2961],
        [0.2996, 0.8576, 0.0432, 0.2611]], requires_grad=True)

In [None]:
S = torch.mean(A);S

In [None]:
S.backward()

In [None]:
print(A.grad)
print(Y.grad)

In [None]:
A.grad = None
Y.grad = None

In [None]:
S = torch.mean(torch.matmul(Y,A));S

In [None]:
S.backward()

In [13]:
print(A.grad)
print(Y.grad)

None
None


In [14]:
(A,Y)

(tensor([[25.3354, 25.4363, 24.6701, 24.5784],
         [29.0970, 29.4576, 29.5569, 29.0961],
         [34.1396, 34.6976, 33.8832, 34.1011]], requires_grad=True),
 tensor([[8.0403, 6.8183, 4.5076],
         [7.0403, 7.8183, 7.6076]], requires_grad=True))

In [12]:
with torch.no_grad():
    A -= 1.2 * A.grad
    Y -= 1.2 * Y.grad
    A.grad = None
    Y.grad = None

print(Y)

tensor([[8.0403, 6.8183, 4.5076],
        [7.0403, 7.8183, 7.6076]], requires_grad=True)


#####  Визуализация на прогреса

In [16]:
class progressBar:
    def __init__(self ,barWidth = 50):
        self.barWidth = barWidth
        self.period = None
    def start(self, count):
        self.item=0
        self.period = int(count / self.barWidth)
        sys.stdout.write("["+(" " * self.barWidth)+"]")
        sys.stdout.flush()
        sys.stdout.write("\b" * (self.barWidth+1))
    def tick(self):
        if self.item>0 and self.item % self.period == 0:
            sys.stdout.write("-")
            sys.stdout.flush()
        self.item += 1
    def stop(self):
        sys.stdout.write("]\n")

In [17]:
def extractDictionary(corpus, limit=20000):
    pb = progressBar()
    pb.start(len(corpus))
    dictionary = {}
    for doc in corpus:
        pb.tick()
        for w in doc:
            if w not in dictionary: dictionary[w] = 0
        dictionary[w] += 1
    L = sorted([(w,dictionary[w]) for w in dictionary], key = lambda x: x[1] , reverse=True)
    if limit > len(L): limit = len(L)
    words = [ w for w,_ in L[:limit] ] + [unkToken]
    word2ind = { w:i for i,w in enumerate(words)}
    pb.stop()
    return words, word2ind

In [18]:
def extractData(corpus, order, word2ind):
    pb = progressBar()
    pb.start(len(corpus))
    unk = word2ind[unkToken]
    start = word2ind[startToken]

    points = sum(len(s)-1 for s in corpus)
    
    target = np.empty(points, dtype='int32')
    context = np.empty((points,order-1), dtype='int32')
    p = 0
    for doc in corpus:
        pb.tick()
        for wi in range(1,len(doc)):
            i = word2ind.get(doc[wi], unk)
            target[p] = i
            sample = []
            for k in range(1,order):
                if wi-k < 0:
                    j = start
                else:
                    j = word2ind.get(doc[wi-k], unk)
                context[p,k-1] = j
            p += 1
    pb.stop()
    return target, context

#####   Зареждане на корпуса

In [26]:
corpus_root = '../JOURNALISM.BG/C-MassMedia'
myCorpus = PlaintextCorpusReader(corpus_root, '.*\.txt')
startToken = '<START>'
endToken = '<END>'
unkToken = '<UNK>'

corpus = [ [startToken] + [w.lower() for w in sent] + [endToken] for sent in myCorpus.sents()]

words, word2ind = extractDictionary(corpus)

[                                                  --------------------------------------------------]


In [27]:
order = 4
target, context = extractData(corpus, order, word2ind)

[                                                  --------------------------------------------------]


In [28]:
emb_size = 50
hid_size = 100

L = len(words)

In [35]:
batchSize = 1000
idx = np.arange(len(target), dtype='int32')
np.random.shuffle(idx)
learning_rate = 1.

#####   Първи вариант

In [36]:
E = torch.empty(L, emb_size, requires_grad = True)
W1 = torch.empty((order-1)*emb_size, hid_size, requires_grad = True)
b1 = torch.empty(hid_size, requires_grad = True)
W2 = torch.empty(hid_size, L, requires_grad = True)
b2 = torch.empty(L, requires_grad = True)

In [37]:
torch.nn.init.normal_(E)
torch.nn.init.normal_(W1)
torch.nn.init.normal_(b1)
torch.nn.init.normal_(W2)
torch.nn.init.normal_(b2)

tensor([ 1.6055, -1.7630,  0.2008,  ..., -0.9813,  0.6026, -0.4261],
       requires_grad=True)

In [38]:
sigmoid_fn = torch.nn.Sigmoid()

In [39]:
with torch.no_grad():
    E -= learning_rate * E.grad
    W1 -= learning_rate * W1.grad
    b1 -= learning_rate * b1.grad
    W2 -= learning_rate * W2.grad
    b2 -= learning_rate * b2.grad
    # Manually zero the gradients
    E.grad = None
    W1.grad = None
    b1.grad = None
    W2.grad = None
    b2.grad = None

if b % 10000 == 0:
    print(b, '/', len(idx), H.item())

TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'

In [41]:
for b in range(0,len(idx),batchSize):

    batchIdx = idx[b:min(b+batchSize,len(idx))]
    S = len(batchIdx)
    batchTarget = torch.tensor(target[batchIdx], dtype=torch.long)
    batchContext = context[batchIdx]

    X = E[batchContext].view(S,(order-1) * emb_size)
    h = sigmoid_fn(torch.matmul(X,W1) + b1)
    z = torch.matmul(h,W2) + b2
    
    t = torch.exp(z)
    s = torch.sum(t,axis=1)
    z = t/s.unsqueeze(1)
    p = z[torch.arange(S),batchTarget]
    H = -torch.mean(torch.log(p))

    H.backward()

    with torch.no_grad():
        E -= learning_rate * E.grad
        W1 -= learning_rate * W1.grad
        b1 -= learning_rate * b1.grad
        W2 -= learning_rate * W2.grad
        b2 -= learning_rate * b2.grad
        # Manually zero the gradients
        E.grad = None
        W1.grad = None
        b1.grad = None
        W2.grad = None
        b2.grad = None

    if b % 10000 == 0:
        print(b, '/', len(idx), H.item())

0 / 9992433 9.774436950683594


KeyboardInterrupt: 

#####   Втори вариант

In [42]:
torch.nn.init.normal_(E)
torch.nn.init.normal_(W1)
torch.nn.init.normal_(b1)
torch.nn.init.normal_(W2)
torch.nn.init.normal_(b2)

tensor([-0.8930, -0.5340, -0.6765,  ...,  1.3285, -0.3465,  1.5441],
       requires_grad=True)

In [43]:
loss_fn = torch.nn.CrossEntropyLoss()

In [44]:
for b in range(0,len(idx),batchSize):
    
    batchIdx = idx[b:min(b+batchSize,len(idx))]
    S = len(batchIdx)
    batchTarget = torch.tensor(target[batchIdx], dtype=torch.long)
    batchContext = context[batchIdx]
    
    X = E[batchContext].view(S,(order-1) * emb_size)
    h = sigmoid_fn(torch.matmul(X,W1) + b1)
    z = torch.matmul(h,W2) + b2
    H = loss_fn(z,batchTarget)
    
    H.backward()
    
    with torch.no_grad():
        E -= learning_rate * E.grad
        W1 -= learning_rate * W1.grad
        b1 -= learning_rate * b1.grad
        W2 -= learning_rate * W2.grad
        b2 -= learning_rate * b2.grad
        # Manually zero the gradients
        E.grad = None
        W1.grad = None
        b1.grad = None
        W2.grad = None
        b2.grad = None
    
    if b % 10000 == 0:
        print(b, '/', len(idx), H.item())

0 / 9992433 29.535554885864258
10000 / 9992433 21.66497802734375
20000 / 9992433 18.567216873168945
30000 / 9992433 16.70700454711914


KeyboardInterrupt: 

#####   Трети вариант

In [48]:
device = torch.device("cuda:0")
torch.version.cuda()

TypeError: 'NoneType' object is not callable

In [46]:
E = torch.empty(L, emb_size, requires_grad = True, device = device)
W1 = torch.empty((order-1)*emb_size, hid_size, requires_grad = True, device = device)
b1 = torch.empty(hid_size, requires_grad = True, device = device)
W2 = torch.empty(hid_size, L, requires_grad = True, device = device)
b2 = torch.empty(L, requires_grad = True, device = device)

AssertionError: Torch not compiled with CUDA enabled

In [None]:
torch.nn.init.normal_(E)
torch.nn.init.normal_(W1)
torch.nn.init.normal_(b1)
torch.nn.init.normal_(W2)
torch.nn.init.normal_(b2)

In [None]:
for b in range(0,len(idx),batchSize):
    
    batchIdx = idx[b:min(b+batchSize,len(idx))]
    S = len(batchIdx)
    batchTarget = torch.tensor(target[batchIdx], dtype=torch.long, device = device)
    batchContext = context[batchIdx]
    
    X = E[batchContext].view(S,(order-1) * emb_size)
    h = sigmoid_fn(torch.matmul(X,W1) + b1)
    z = torch.matmul(h,W2) + b2
    H = loss_fn(z,batchTarget)
    
    H.backward()
    
    with torch.no_grad():
        E -= learning_rate * E.grad
        W1 -= learning_rate * W1.grad
        b1 -= learning_rate * b1.grad
        W2 -= learning_rate * W2.grad
        b2 -= learning_rate * b2.grad
        # Manually zero the gradients
        E.grad = None
        W1.grad = None
        b1.grad = None
        W2.grad = None
        b2.grad = None
    
    if b % 10000 == 0:
        print(b, '/', len(idx), H.item())