In [1]:
!pip install torch transformers datasets evaluate




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
# pip install torch transformers datasets evaluate

import os
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
from transformers.modeling_outputs import BaseModelOutput
import evaluate

# =====================
# Liquid Neural Network
# =====================
class LiquidTimeStep(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.W_in = nn.Linear(input_size, hidden_size)
        self.W_h = nn.Linear(hidden_size, hidden_size)
        self.tau = nn.Parameter(torch.ones(hidden_size))

    def forward(self, x, h):
        dx = torch.tanh(self.W_in(x) + self.W_h(h))
        h_new = h + (dx - h) / self.tau
        return h_new

class LiquidNeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.liquid_step = LiquidTimeStep(input_size, hidden_size)
        self.output_layer = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        batch_size, seq_len, _ = x.size()
        h = torch.zeros(batch_size, self.hidden_size, device=x.device)
        for t in range(seq_len):
            h = self.liquid_step(x[:, t, :], h)
        return self.output_layer(h)

# =====================
# Dataset Loader
# =====================
class LegalDataset(Dataset):
    def __init__(self, root_dir, split, tokenizer, max_length=512):
        self.data = []
        judg_dir = os.path.join(root_dir, split, 'judgement')
        summ_dir = os.path.join(root_dir, split, 'summary')

        for fname in os.listdir(judg_dir):
            if fname.endswith('.txt') and os.path.exists(os.path.join(summ_dir, fname)):
                with open(os.path.join(judg_dir, fname), 'r', encoding='utf-8') as f1, \
                     open(os.path.join(summ_dir, fname), 'r', encoding='utf-8') as f2:
                    judgement = f1.read().strip()
                    summary = f2.read().strip()
                self.data.append((judgement, summary))

        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        judgement, summary = self.data[idx]
        enc_j = self.tokenizer(judgement, return_tensors='pt',
                               truncation=True, padding='max_length',
                               max_length=self.max_length)
        enc_s = self.tokenizer(summary, return_tensors='pt',
                               truncation=True, padding='max_length',
                               max_length=self.max_length)
        return enc_j.input_ids.squeeze(0), enc_s.input_ids.squeeze(0)

# =====================
# Training & Evaluation
# =====================
def train_and_evaluate(root_dir, device='cuda' if torch.cuda.is_available() else 'cpu'):
    tokenizer = T5Tokenizer.from_pretrained('t5-small')
    t5 = T5ForConditionalGeneration.from_pretrained('t5-small').to(device)
    liquid = LiquidNeuralNetwork(
        input_size=1,  # token IDs as single feature
        hidden_size=256,
        output_size=t5.config.d_model
    ).to(device)

    train_data = LegalDataset(root_dir, 'train-data', tokenizer)
    test_data = LegalDataset(root_dir, 'test-data', tokenizer)

    train_loader = DataLoader(train_data, batch_size=4, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=4)

    optimizer = torch.optim.Adam(list(liquid.parameters()) + list(t5.parameters()), lr=5e-5)

    print("==== Training ====")
    for epoch in range(3):
        t5.train()
        for idx, (ids_judg, ids_summ) in enumerate(train_loader):
            ids_judg = ids_judg.to(device).float().unsqueeze(-1)  # [batch, seq, 1]
            ids_summ = ids_summ.to(device)

            emb = liquid(ids_judg).unsqueeze(1)  # [batch, 1, hidden_size]
            outputs = t5(input_ids=None,
                         encoder_outputs=BaseModelOutput(last_hidden_state=emb),
                         labels=ids_summ)

            loss = outputs.loss
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if idx % 50 == 0:
                print(f"Epoch {epoch} Step {idx} Loss: {loss.item():.4f}")

    torch.save(liquid.state_dict(), 'liquid_model.pth')
    t5.save_pretrained('t5_liquid_model')
    tokenizer.save_pretrained('t5_liquid_model')

    print("==== Evaluating with ROUGE ====")
    rouge = evaluate.load("rouge")
    t5.eval()

    predictions, references = [], []
    with torch.no_grad():
        for ids_judg, ids_summ in test_loader:
            ids_judg = ids_judg.to(device).float().unsqueeze(-1)
            ids_summ = ids_summ.to(device)

            emb = liquid(ids_judg).unsqueeze(1)
            encoder_outputs = BaseModelOutput(last_hidden_state=emb)
            generated_ids = t5.generate(encoder_outputs=encoder_outputs, max_length=128)

            decoded_preds = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
            decoded_refs = tokenizer.batch_decode(ids_summ, skip_special_tokens=True)

            predictions.extend(decoded_preds)
            references.extend(decoded_refs)

    rouge_scores = rouge.compute(predictions=predictions, references=references)
    print("ROUGE Scores:", rouge_scores)

# =====================
# Run
# =====================
if __name__ == '__main__':
    # Point to the INNER folder
    data_root = 'IN-Abs/IN-Abs'
    train_and_evaluate(data_root)


  from .autonotebook import tqdm as notebook_tqdm
You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


==== Training ====
Epoch 0 Step 0 Loss: 10.0434
Epoch 0 Step 50 Loss: 6.7892
Epoch 0 Step 100 Loss: 5.9301
Epoch 0 Step 150 Loss: 4.7515
Epoch 0 Step 200 Loss: 4.8173
Epoch 0 Step 250 Loss: 4.3100
Epoch 0 Step 300 Loss: 4.5681
Epoch 0 Step 350 Loss: 4.4666
Epoch 0 Step 400 Loss: 3.9090
Epoch 0 Step 450 Loss: 3.3466
Epoch 0 Step 500 Loss: 3.9505
Epoch 0 Step 550 Loss: 4.4285
Epoch 0 Step 600 Loss: 4.3987
Epoch 0 Step 650 Loss: 4.2467
Epoch 0 Step 700 Loss: 4.2485
Epoch 0 Step 750 Loss: 3.4759
Epoch 0 Step 800 Loss: 3.8752
Epoch 0 Step 850 Loss: 4.0699
Epoch 0 Step 900 Loss: 3.6458
Epoch 0 Step 950 Loss: 4.1561
Epoch 0 Step 1000 Loss: 3.9061
Epoch 0 Step 1050 Loss: 4.1338
Epoch 0 Step 1100 Loss: 3.3210
Epoch 0 Step 1150 Loss: 4.0493
Epoch 0 Step 1200 Loss: 3.4245
Epoch 0 Step 1250 Loss: 4.2910
Epoch 0 Step 1300 Loss: 3.7003
Epoch 0 Step 1350 Loss: 4.3048
Epoch 0 Step 1400 Loss: 3.9976
Epoch 0 Step 1450 Loss: 4.1789
Epoch 0 Step 1500 Loss: 3.9657
Epoch 0 Step 1550 Loss: 3.2164
Epoch 0 Ste