In [5]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2Config, GPT2LMHeadModel, AdamW, get_linear_schedule_with_warmup
from tqdm import tqdm
import numpy as np
from IPython.display import clear_output
import time
import matplotlib.pyplot as plt
from torch.quantization import quantize_dynamic
from source.load_poems import load_poems
import kagglehub

In [2]:
class PoetryDataset(Dataset):
    def __init__(self, poems_data, tokenizer, max_length, device):
        self.input_ids = []
        self.attn_masks = []
        self.categories = []
        
        for poem in tqdm(poems_data):
            # Format text with hierarchical categories
            full_text = f"Title: {poem['title']}\n" \
                       f"Category: {poem['main_category']}/{poem['sub_category']}\n\n{poem['text']}"
            
            encodings = tokenizer(
                full_text,
                truncation=True,
                max_length=max_length,
                padding='max_length',
                return_tensors='pt'
            )
            
            self.input_ids.append(encodings['input_ids'][0].to(device))
            self.attn_masks.append(encodings['attention_mask'][0].to(device))
            self.categories.append(f"{poem['main_category']}/{poem['sub_category']}")
    
    def __len__(self):
        return len(self.input_ids)
    
    def __getitem__(self, idx):
        return {
            'input_ids': self.input_ids[idx],
            'attention_mask': self.attn_masks[idx],
            'category': self.categories[idx]
        }

In [None]:
MAX_LENGTH = 400
BATCH_SIZE = 2
EPOCHS = 10
LEARNING_RATE = 2e-5
WARMUP_STEPS = 0.1

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

path = kagglehub.dataset_download("michaelarman/poemsdataset")
poems_data = load_poems(path)

Using device: cpu
Loaded 20625 poems:
- Forms: 6306
- Topics: 14319


In [9]:
poems_data

[{'text': "Of the modern versifications of\nancient legendary tales. - An impromptu.\nThe tender infant, meek and mild,\nFell down upon the stone:\nThe nurse took p the squealing child,\nBut still the child squeal'd on.",
  'main_category': 'forms',
  'sub_category': 'burlesque',
  'title': 'BurlesquePoemsBurlesquePoembySamuelJohnson'},
 {'text': 'Dear beautiful lady\nthe poor devil very humbly thanks you\nfor your two melons and prays for you\nto content yourself with his small thanks.\nHe would like to send you some wonder\nbecause wonder he names Melons.\nBut in a man of his kind from whom\nnothing comes out and nothing comes in\nhe goes nuts for wonderful melons.\nIf only your beautiful eyes\nhis house would light would be very well.\nSo just please truth be told from little thanks.\nAnd if that is insufficient pierce me at an angle\nwith a sharp dart and may my heart be skewered\non the spot.\nMay I love you with all my soul.\nDo not doubt ebony bait nor my eyes\nspearing your mel