In [44]:
import torch
import torch.nn as nn
import random
import numpy as np
import Tokens as tk
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
VOCAB_SIZE = 1201

разбивка текста и создание словаря

In [45]:
import os

samples = []
for sample in os.listdir('samples'):
    with open("samples/" + sample, encoding="utf-8") as text:
        samples.append(text.read().lower())


class QAPair:
    def __init__(self, question, answer):
        self.question = question
        self.answer = answer

dataset = []

for sample in samples:
    lines = sample.splitlines()
    questions = lines[::2]
    answers = lines[1::2]
    for q, a in zip(questions, answers):
        dataset.append(QAPair(q, a))


In [46]:
tdg = tk.TokenDictionaryGenerator(vocabulary_size = VOCAB_SIZE-1)
tokens = tdg.generate_tokens(samples)
tokenizer = tk.Tokenizer(tokens)
tokens.save("tokens.json")

создание модели 
предложение -> hidden
последние слово + hidden -> слово(1)...слово(n)

In [47]:
class RnnTextGen(nn.Module):

    def __init__(self,input_size,inp_lstm_size,hid_size,n_layers,out_size,dropout=0.2) -> None:
        super(RnnTextGen,self).__init__()
        self.input_size = input_size
        self.out_size = out_size
        self.n_layers = n_layers
        self.hidden_size=hid_size
        self.Encoder = nn.Embedding(input_size,inp_lstm_size)
        self.lstm = nn.LSTM(inp_lstm_size,hid_size,n_layers)
        self.dropout = nn.Dropout(dropout)
        self.l1 = nn.Linear(hid_size,out_size)
        self.l2 = nn.Linear(inp_lstm_size,out_size)
        self.attention = nn.MultiheadAttention(out_size,1)
        
    def forward(self,x,hidden=None):
        x = self.Encoder(x)
        p = self.l2(x)
        aw,_ = self.attention(p.view(-1,self.out_size),p.view(-1,self.out_size),p.view(-1,self.out_size))#a - attn output b - attn_wheights)
        x,hidden = self.lstm(x)
        x = self.dropout(x)
        x = self.l1(x)
        x = torch.cat((aw,x))
        return x,hidden
    
    def init_hidden(self,batch_size=1):
        return (torch.zeros(self.n_layers, batch_size, self.hidden_size, requires_grad=True).to(device),
               torch.zeros(self.n_layers, batch_size, self.hidden_size, requires_grad=True).to(device))

In [48]:
model=RnnTextGen(VOCAB_SIZE-1,1000,500,2,VOCAB_SIZE).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, amsgrad=True)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, 
    patience=5, 
    verbose=True, 
    factor=0.5
)

In [49]:
model(torch.LongTensor([1,2]).to(device))[0].shape

torch.Size([4, 1201])

In [50]:
def evaluate(model:RnnTextGen,text:str,prediction_lim:int=15):
    text_idx = torch.LongTensor(list(tokenizer.tokenize(text))).to(device)
    hidden = model.init_hidden()
    inp = text_idx
    predicted_text=""
    for i in range(prediction_lim):
        next_w , hidden = model(inp.to(device),hidden)
        inp = torch.cat([inp,next_w[-1].argmax().view(-1)])
        if next_w[-1].argmax() == torch.LongTensor([VOCAB_SIZE-1]).to(device):
            break
        word = tokens.decode(int(next_w[-1].argmax()))
        predicted_text +=word
    return predicted_text

In [51]:
def get_batch(dataset:list):
    for qa in dataset:
        question_idx = list(tokenizer.tokenize(qa.question))
        target = list(tokenizer.tokenize(qa.answer))+[tokens.count()]
        test = question_idx+target[:-1]

        target =torch.LongTensor(target).to(device)
        test = torch.LongTensor(test).to(device)
        yield target,test

In [52]:
def train(epoches:int,model:RnnTextGen,batch_size:int)->None:
    """epoches - number of epoches through all dataset
    model - model required to teach
    batch_size - n/a"""
    loss_avg =[]
    for epoch in range(epoches):
        for target,train in get_batch(dataset):
            model.train()

            hidden = model.init_hidden(batch_size)

            output,hidden = model(train,hidden)
            target_len = len(target)
            loss = criterion(output[-target_len:],target)

            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            loss_avg.append(loss.item())
            if len(loss_avg) >= 50:
                mean_loss = np.mean(loss_avg)
                print(f'Loss: {mean_loss}')
                scheduler.step(mean_loss)
                model.eval()
                question = random.choice(dataset).question
                answer = evaluate(model,question)
                print(f"Question: {question} \n Answer: {answer}")
                loss_avg = []

обучение модели

In [53]:
train(30, model, 1)

Loss: 6.634598226547241
Question: какой твой любимый домашний питомец? 
 Answer: я дум.
Loss: 6.571789798736572
Question: я буду использовать базу даных наших собщений 
 Answer: , это , это , это , это , это , это , это ,
Loss: 5.8139488697052
Question: знаешь, в последнее время я интересуюсь фотографией. мне нравится фиксировать моменты и сохранять их на фото. 
 Answer: 
Loss: 6.056747531890869
Question: и как мы можем знать, что предопределено, а что нет? 
 Answer: 
Loss: 5.445373315811157
Question: какую роль играет учитель в образовании? 
 Answer: мы можем сделать,,ей ей ей знамой,, но .
Loss: 5.521321187019348
Question: но как мы можем получить практический опыт? 
 Answer: 
Loss: 5.168804602622986
Question: но как мы можем получить практический опыт? 
 Answer: мы можем олучисти,ну,нф призв,ходяакже 
Loss: 4.643971447944641
Question: какой твой любимый музыкальный жанр? 
 Answer: 
Loss: 4.664632339477539
Question: привет как я могу тебя звать? 
 Answer: мы можем получить информациц

KeyboardInterrupt: 

In [57]:
quest = input().lower()
evaluate(model,quest,35)

'звездные войны". а у тебя?'

In [None]:
torch.save(model,"data.pkl")

In [None]:
model = torch.load("data.pkl").to(device)

In [19]:
import torch 
import torch.nn as nn

class abracadabra(nn.Module):
    def __init__(self,size):
        super(abracadabra,self).__init__()
        self.size = size

        self.embeding = nn.Embedding(size,10)
        self.l1 = nn.Linear(10,size)
        self.attention = nn.MultiheadAttention(size,2)
        self.lstm = nn.LSTM(size,10,2)
        self.l2 = nn.Linear(10,size)
        
        
    def forward(self,x,k,v,hidden=None):
        x = self.embeding(x)
        x = self.l1(x)
        a,b = self.attention(x.view(-1,self.size),k.view(-1,self.size),v.view(-1,self.size))#a - attn output b - attn_wheights
        out,hidden = self.lstm(x,hidden)
        out = self.l2(out)
        sumt = a*out
        return a,out,sumt
    
model = abracadabra(6)
k = torch.Tensor(6)
v = torch.Tensor(6)
x=torch.LongTensor([1,2,3,4,5])
model(x,k,v)

(tensor([[ 4.0638e-44,  8.4078e-45,  8.4078e-45, -1.4013e-45, -8.4078e-45,
           7.0065e-45],
         [ 4.0638e-44,  8.4078e-45,  8.4078e-45, -1.4013e-45, -8.4078e-45,
           7.0065e-45],
         [ 4.0638e-44,  8.4078e-45,  8.4078e-45, -1.4013e-45, -8.4078e-45,
           7.0065e-45],
         [ 4.0638e-44,  8.4078e-45,  8.4078e-45, -1.4013e-45, -8.4078e-45,
           7.0065e-45],
         [ 4.0638e-44,  8.4078e-45,  8.4078e-45, -1.4013e-45, -8.4078e-45,
           7.0065e-45]], grad_fn=<SqueezeBackward1>),
 tensor([[-0.0321,  0.0983,  0.1123, -0.2716,  0.2598, -0.0007],
         [-0.0276,  0.0937,  0.0941, -0.2640,  0.2666, -0.0003],
         [-0.0249,  0.0907,  0.0846, -0.2574,  0.2705,  0.0070],
         [-0.0251,  0.0860,  0.0800, -0.2522,  0.2768,  0.0137],
         [-0.0288,  0.0798,  0.0762, -0.2562,  0.2790,  0.0122]],
        grad_fn=<AddmmBackward0>),
 tensor([[-1.4013e-45,  1.4013e-45,  1.4013e-45,  0.0000e+00, -2.8026e-45,
          -0.0000e+00],
         [-1.40