In [None]:
%pip install nltk torch transformers sentencepiece


In [None]:
import torch
import torch.nn as nn
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import BertTokenizer, BertModel
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers import GPTNeoForCausalLM
from torch.optim import Adam
import sentencepiece
from nltk.translate.bleu_score import sentence_bleu

In [None]:
MODEL_NAME = "EleutherAI/gpt-neo-125M"

In [None]:
tokenizer = GPT2Tokenizer.from_pretrained(MODEL_NAME)
model_with_context = GPTNeoForCausalLM.from_pretrained(MODEL_NAME)
model_with_finetuning = GPTNeoForCausalLM.from_pretrained(MODEL_NAME)

In [None]:
tokenizer.pad_token = tokenizer.eos_token
model_with_context.config.pad_token_id = tokenizer.pad_token_id
model_with_finetuning.config.pad_token_id = tokenizer.pad_token_id

In [None]:
model_with_finetuning_copy = GPTNeoForCausalLM.from_pretrained(MODEL_NAME)

In [None]:
haikus = list(open('data/haiku.txt').read().split('\n'))

In [None]:
optimizer = Adam(model_with_finetuning.parameters(), lr=5e-5)
loss_fn = nn.CrossEntropyLoss()

In [None]:
def generate_text(model, prompt, max_context_length=1024):
    inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True, max_length=max_context_length)
    inputs = {k: v.to(model.device) for k, v in inputs.items()}
    outputs = model.generate(
        inputs["input_ids"],
        max_new_tokens=len(tokenizer.tokenize(prompt)),  
        num_return_sequences=1,
        attention_mask=inputs["attention_mask"],
        repetition_penalty=1.5, 
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [None]:
def finetune_model(model, text, max_context_length=512, num_epochs=3):
    model.train()
    for _ in range(num_epochs):
        inputs = tokenizer(text, return_tensors="pt", padding=True)
        if inputs.input_ids.size(-1) > max_context_length:
            inputs.input_ids = inputs.input_ids[:, -max_context_length:]
        labels = inputs.input_ids
        optimizer.zero_grad()
        outputs = model(inputs.input_ids, labels=labels, attention_mask=inputs.attention_mask)
        loss = loss_fn(outputs.logits.view(-1, outputs.logits.size(-1)), labels.view(-1))
        loss.backward()
        optimizer.step()
    return loss.item()

In [None]:
def evaluate_bleu(reference, generated_text):
    reference_tokens = tokenizer.tokenize(reference)
    generated_tokens = tokenizer.tokenize(generated_text)
    return sentence_bleu([reference_tokens], generated_tokens)

In [None]:
context = "" 
bleu_scores_context = []
bleu_scores_finetuning = []

In [None]:
def process_haiku(model_with_context, model_with_finetuning, haiku, context=""):
    context += haiku + " "
    
    generated_context_full = generate_text(model_with_context, context[-1024:])
    generated_finetuning_full = generate_text(model_with_finetuning, context[-1024:])
    
    bleu_context_full = evaluate_bleu(context, generated_context_full)
    bleu_finetuning_full = evaluate_bleu(context, generated_finetuning_full)
    
    generated_context = generate_text(model_with_context, haiku)
    generated_finetuning = generate_text(model_with_finetuning, haiku)
    bleu_context = evaluate_bleu(haiku, generated_context)
    bleu_finetuning = evaluate_bleu(haiku, generated_finetuning)
    
    print(f"Original Haiku: {haiku}")
    print(f"Generated by Context Model (Haiku): {generated_context} | BLEU: {bleu_context:.4f}")
    print(f"Generated by Finetuned Model (Haiku): {generated_finetuning} | BLEU: {bleu_finetuning:.4f}")
    print(f"Generated by Context Model (Full Context): {generated_context_full} | BLEU (Full): {bleu_context_full:.4f}")
    print(f"Generated by Finetuned Model (Full Context): {generated_finetuning_full} | BLEU (Full): {bleu_finetuning_full:.4f}")
    print("-" * 50)
    
    return context, bleu_context, bleu_finetuning

context = "" 
bleu_scores_context = []
bleu_scores_finetuning = []

for haiku in haikus[:3]:
    context, bleu_context, bleu_finetuning = process_haiku(model_with_context, model_with_finetuning, haiku, context)
    bleu_scores_context.append(bleu_context)
    bleu_scores_finetuning.append(bleu_finetuning)

print("BLEU Scores for Context Model (Haiku):", bleu_scores_context)
print("BLEU Scores for Finetuning Model (Haiku):", bleu_scores_finetuning)
