In [7]:
from src.eval_transformer_pipeline import run_transformer_tests
from src.lstm_train import load_training_model, train_model, save_training_model, save_training_curves
from src.lstm_model import LSTMModel
from src.next_token_dataset import tokenizer, train_loader, val_loader, test_loader
from src.eval_lstm import calculate_rouge_and_test_results

In [None]:
# Создаем модель
vocab_size = len(tokenizer)
model = LSTMModel(vocab_size=vocab_size, hidden_dim=128, num_layers=3, dropout=0.5)
# Запускаем процесс обучения
train_results = train_model(model, train_loader, val_loader, tokenizer, num_epochs=10, learning_rate=0.001)
# Сохраняем итоговые результаты
save_dir = save_training_model(train_results['model'])
save_training_curves(train_results['train_losses'], train_results['val_rouge_scores'])

Результаты обучения:

<img src="./model/training_plots.png" width="900" height="400" alt="График ROUGE">

- avg_train_loss: 5.11
- avg_rouge_1: 0.04
- avg_rouge_2: 0.01

Пример генерации 1:
- Исходный текст: "this nice weather is making me really want to get my tattoo sleeve but gotta pay for the lsat"
- Переданный в обработку: "this nice weather is making me really want to get my tattoo sleeve but gotta"
- Результат обработки: "this nice weather is making me really want to get my tattoo sleeve but gotta get out of the same time i dont want to go"

Пример генерации 2:
- Исходный текст: "this nice weather is making me really want to get my tattoo sleeve but gotta pay for the lsat"
- Переданный в обработку: "this nice weather is making me really want to get my tattoo sleeve but gotta"
- Результат обработки: "this nice weather is making me really want to get my tattoo sleeve but gotta go to bed nows and tomorrow"

Пример генерации 3:
- Исходный текст: "this nice weather is making me really want to get my tattoo sleeve but gotta pay for the lsat"
- Переданный в обработку: "this nice weather is making me really want to get my tattoo sleeve but gotta"
- Результат обработки: "this nice weather is making me really want to get my tattoo sleeve but gotta go to bed i miss"


In [None]:
# Загружаем модель
model = load_training_model("./model/full_model.pth")
# Тестирование lstm
res = calculate_rouge_and_test_results(model, test_loader, tokenizer, num_examples=1000)
print(res)

Результаты тестирования:

- avg_rouge_1: 0.04
- avg_rouge_2: 0.01 

- Исходный текст: "omg i cant believe i forgot to look for u next yr well definitely have to seek each other out"
- Переданный в обработку: "omg i cant believe i forgot to look for u next yr well definitely"
- Результат обработки: "omg i cant believe i forgot to look for u next yr well definitely follow meoquot"

In [None]:
# Тестирование готовой модели distilgpt2
run_transformer_tests()

Результаты тестирования distilgpt2:

<img src="./transformer_tests_results/training_plots.png" width="300" height="400" alt="График ROUGE">

- avg_rouge_1: 0.07
- avg_rouge_2: 0.01

Пример генерации 1:
- Исходный текст: "haha my dad signed in to my twitter some how lol i dont care about david cameron no more mod studs woop woop"
- Переданный в обработку: "haha my dad signed in to my twitter some how lol i dont care about david cameron"
- Результат обработки: "haha my dad signed in to my twitter some how lol i dont care about david cameron i dont care about david camer"

Пример генерации 2:
- Исходный текст: "im interested in getting hooked up with spymaster"
- Переданный в обработку: "im interested in getting hooked up"
- Результат обработки: "im interested in getting hooked up to the"

Пример генерации 3:
- Исходный текст: "i crashed and cant get back in"
- Переданный в обработку: "i crashed and cant get"
- Результат обработки: "i crashed and cant get on"

Вывод:

1. Обе модели генерируют связный текст.
2. Метрики ROUGE в обоих случаях низкие, поскольку проблематично точно предсказать окончание фразы.
3. Поскольку модель LSTM более легковесна, её использование может быть предпочтительнее.