In [18]:
import pandas as pd
import numpy as npn
from rouge import Rouge
import rouge
from torch.utils.data import DataLoader
import torch
from datasets import load_dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW
import tensorflow as tf


In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
print("Anzahl der verfügbaren GPUs:", len(physical_devices))

Anzahl der verfügbaren GPUs: 0


In [12]:
# Hier die größe der Test und Trainigsdaten Anpassen - auf dem Laptop geht das so nicht

train_data = load_dataset('xsum', split='train[:2]')
print("Train Set: ",len(train_data))

test_data= load_dataset('xsum', split='test[:20]')
print("Test Set: ",len(test_data))

train_article = train_data['document'][0]
train_summary = train_data['summary'][0]
test_article = test_data['document'][0]
test_summary = test_data['summary'][0]

Found cached dataset xsum (C:/Users/JDari/.cache/huggingface/datasets/xsum/default/1.2.0/082863bf4754ee058a5b6f6525d0cb2b18eadb62c7b370b095d1364050a52b71)


Train Set:  2


Found cached dataset xsum (C:/Users/JDari/.cache/huggingface/datasets/xsum/default/1.2.0/082863bf4754ee058a5b6f6525d0cb2b18eadb62c7b370b095d1364050a52b71)


Test Set:  20


In [4]:
tokenizer = T5Tokenizer.from_pretrained('t5-small', model_max_length=50000)

In [13]:
def preprocess_function(examples):
    inputs = [doc for doc in examples['document']]
    targets = [summ for summ in examples['summary']]
    
    inputs_encoded = tokenizer.batch_encode_plus(inputs, truncation=True, padding='max_length', max_length=512)
    targets_encoded = tokenizer.batch_encode_plus(targets, truncation=True, padding='max_length', max_length=512)
    
    input_ids = torch.tensor(inputs_encoded['input_ids'])
    attention_mask = torch.tensor(inputs_encoded['attention_mask'])
    labels = torch.tensor(targets_encoded['input_ids'])
    
    return {'input_ids': input_ids,
            'attention_mask': attention_mask,
            'labels': labels}
train_data_processed = train_data.map(preprocess_function, batched=True)
test_data_processed = test_data.map(preprocess_function, batched=True)

learning_rate = 1e-4
weight_decay = 0.01
num_train_epochs = 5
batch_size = 2

# Erstellung des Optimierers
model = T5ForConditionalGeneration.from_pretrained('t5-small')
optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

# Erstellung der DataLoader
train_dataloader = DataLoader(train_data_processed, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_data_processed, batch_size=batch_size, shuffle=True)

# Trainingsschleife
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

Loading cached processed dataset at C:\Users\JDari\.cache\huggingface\datasets\xsum\default\1.2.0\082863bf4754ee058a5b6f6525d0cb2b18eadb62c7b370b095d1364050a52b71\cache-37c1da1e6932719d.arrow


                                                  

T5ForConditionalGeneration(
  (shared): Embedding(32128, 512)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=512, bias=False)
              (k): Linear(in_features=512, out_features=512, bias=False)
              (v): Linear(in_features=512, out_features=512, bias=False)
              (o): Linear(in_features=512, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 8)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseActDense(
              (wi): Linear(in_features=512, out_features=2048, bias=False)
              (wo): Linear(in_features=2048, out_features=512, bias=False)
              (dropout): Drop

In [7]:
for epoch in range(num_train_epochs):
    model.train()
    total_loss = 0
    
    for batch in train_dataloader:
        input_ids = torch.cat(batch['input_ids'], dim=0)
        attention_mask = torch.cat(batch['attention_mask'], dim=0)
        labels = torch.cat(batch['labels'], dim=0)

        input_ids = input_ids.unsqueeze(0)
        attention_mask = attention_mask.unsqueeze(0)
        
        optimizer.zero_grad()
        
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, decoder_input_ids=input_ids, labels=labels)
        loss = outputs.loss
        
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
    
    average_loss = total_loss / len(train_dataloader)
    print(f"Epoch {epoch + 1}: Average Loss = {average_loss:.4f}")
 

Epoch 1: Average Loss = 12.1761
Epoch 2: Average Loss = 10.8990
Epoch 3: Average Loss = 8.7943
Epoch 4: Average Loss = 7.7384
Epoch 5: Average Loss = 6.5529


In [20]:
# Evaluationsschleife
model.eval()
total_loss = 0

rouge_scorer = Rouge()

total_rouge1 = 0.0
total_rouge2 = 0.0
total_rougeL = 0.0


for batch in test_dataloader:
    input_ids = torch.cat(batch['input_ids'], dim=0)
    attention_mask = torch.cat(batch['attention_mask'], dim=0)
    labels = torch.cat(batch['labels'], dim=0)

    input_ids = input_ids.unsqueeze(0)
    attention_mask = attention_mask.unsqueeze(0)
    labels = labels.unsqueeze(0)
    
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        logits = outputs.logits
        loss = outputs.loss

        predicted_ids = model.generate(input_ids=input_ids, attention_mask=attention_mask)
        predicted_sentences = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)
        reference_sentences = tokenizer.batch_decode(labels, skip_special_tokens=True)

        rouge_scores = rouge_scorer.get_scores(predicted_sentences, reference_sentences)
        total_rouge1 += rouge_scores[0]['rouge-1']['f']
        total_rouge2 += rouge_scores[0]['rouge-2']['f']
        total_rougeL += rouge_scores[0]['rouge-l']['f']
        
    total_loss += loss.item()

average_loss = total_loss / len(test_dataloader)
average_rouge1 = total_rouge1 / len(test_dataloader)
average_rouge2 = total_rouge2 / len(test_dataloader)
average_rougeL = total_rougeL / len(test_dataloader)
print(f"Test Loss: {average_loss:.4f}")
print(f"Average ROUGE-1: {average_rouge1:.4f}")
print(f"Average ROUGE-2: {average_rouge2:.4f}")
print(f"Average ROUGE-L: {average_rougeL:.4f}")



Test Loss: 22.7623
Average ROUGE-1: 0.0277
Average ROUGE-2: 0.0000
Average ROUGE-L: 0.0277
