In [1]:
import json
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments
from transformers import EarlyStoppingCallback
from datasets import load_dataset, Dataset
import numpy as np
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Загрузка данных из JSON файла
def load_poems(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data

In [3]:
# Подготовка данных для обучения
def prepare_dataset(poems):
    texts = []
    for poem in poems.values():
        texts.append("\n".join(poem))  # Объединяем строки стихотворения в один текст
    return Dataset.from_dict({"text": texts})

In [4]:
# Генерация стихотворения
def generate_poem(model, tokenizer, prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = model.generate(**inputs, max_length=100)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [5]:
import sacrebleu

def calculate_chrf(reference_texts, generated_texts):
    # reference_texts - список строк с эталонными текстами
    # generated_texts - список строк с сгенерированными текстами
    chrf_score = sacrebleu.corpus_chrf(generated_texts, [reference_texts], char_order=6)
    return chrf_score.score

def calculate_perplexity(model, tokenizer, texts):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for text in texts:
            inputs = tokenizer(text, return_tensors="pt")
            outputs = model(**inputs, labels=inputs["input_ids"])
            total_loss += outputs.loss.item()
    
    avg_loss = total_loss / len(texts)
    perplexity = torch.exp(torch.tensor(avg_loss))
    return perplexity.item()

def calculate_novelty(reference_texts, generated_texts):
    reference_set = set(reference_texts)
    unique_generated = set(generated_texts)
    
    novelty_score = len(unique_generated - reference_set) / len(unique_generated) if unique_generated else 0
    return novelty_score

def calculate_distinct_n(generated_texts, n=2):
    ngrams = set()
    for text in generated_texts:
        tokens = text.split()  # Разделение на слова
        for i in range(len(tokens) - n + 1):
            ngram = tuple(tokens[i:i+n])
            ngrams.add(ngram)
    
    distinct_n_score = len(ngrams) / sum(len(text.split()) for text in generated_texts)
    return distinct_n_score

def llm_as_judge(model, tokenizer, texts):
    scores = []
    
    for text in texts:
        inputs = tokenizer(f"Оцените качество следующего стихотворения от 1 до 10:\n{text}", return_tensors="pt")
        outputs = model.generate(**inputs)
        score_str = tokenizer.decode(outputs[0], skip_special_tokens=True)
        
        # Предполагаем, что оценка будет в формате "Оценка: X"
        score = int(score_str.split(":")[-1].strip())
        scores.append(score)

    average_score = sum(scores) / len(scores) if scores else 0
    return average_score


def evaluate_model(model, tokenizer, reference_texts, generated_texts):
    chrf_score = calculate_chrf(reference_texts, generated_texts)
    perplexity_score = calculate_perplexity(model, tokenizer, generated_texts)
    distinct_n_score = calculate_distinct_n(generated_texts)
    novelty_score = calculate_novelty(reference_texts, generated_texts)
    llm_judge_score = llm_as_judge(model, tokenizer, generated_texts)

    print(f"chrF++: {chrf_score:.4f}")
    print(f"Perplexity: {perplexity_score:.4f}")
    print(f"Distinct-n: {distinct_n_score:.4f}")
    print(f"Novelty: {novelty_score:.4f}")
    print(f"LLM-as-judge score: {llm_judge_score:.4f}")

# Пример вызова функции оценки после обучения модели:
# evaluate_model(model, tokenizer, reference_poems_list, generated_poems_list)

In [6]:
def plot_metrics(metrics):
    plt.plot(metrics['train_loss'], label='Train Loss')
    plt.plot(metrics['eval_loss'], label='Eval Loss')
    
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    
    plt.title('Training and Evaluation Loss')
    plt.legend()
    
    plt.show()

In [7]:
# Загрузка данных
poems = load_poems('data/poems.json')
    
# Подготовка датасета
dataset = prepare_dataset(poems)

# Загрузка предобученной модели и токенизатора
model_name = "gpt2"  # Вы можете выбрать другую модель из Hugging Face Model Hub
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

In [8]:
# Настройка параметров обучения
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=30,
    per_device_train_batch_size=2,
    save_steps=10_000,
    save_total_limit=2,
    logging_dir='./logs',
    logging_steps=500,
    eval_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    greater_is_better=False,
)

# Создание экземпляра Trainer с добавлением EarlyStoppingCallback
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    eval_dataset=dataset,  # Для простоты используем тот же датасет для оценки
    callbacks=[EarlyStoppingCallback(early_stopping_patience=5)]  # Установите patience на 5 эпох
)

ValueError: --load_best_model_at_end requires the save and eval strategy to match, but found
- Evaluation strategy: IntervalStrategy.EPOCH
- Save strategy: SaveStrategy.STEPS

In [None]:

# Обучение модели
trainer.train()

In [None]:
# Сохранение модели после обучения
model.save_pretrained('./poem_generator')
tokenizer.save_pretrained('./poem_generator')

# Оценка качества модели (здесь вы можете добавить свою реализацию)
evaluate_model(model, tokenizer, dataset)

In [None]:
prompt = "На лугу цветы"
print(generate_poem(model, tokenizer, prompt))