In [1]:
!where python   # On Windows

C:\Users\asus\AppData\Local\Programs\Python\Python310\python.exe
C:\Users\asus\AppData\Local\Programs\Python\Python312\python.exe
C:\Users\asus\AppData\Local\Microsoft\WindowsApps\python.exe


INFO: Could not find "#".
INFO: Could not find "On".
INFO: Could not find "Windows".


In [2]:
!pip install torch transformers evaluate




[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
!pip install transformers evaluate

Collecting transformers
  Downloading transformers-4.55.0-py3-none-any.whl (11.3 MB)
Collecting evaluate
  Downloading evaluate-0.4.5-py3-none-any.whl (84 kB)
Collecting tokenizers<0.22,>=0.21
  Downloading tokenizers-0.21.4-cp39-abi3-win_amd64.whl (2.5 MB)
Collecting tqdm>=4.27
  Using cached tqdm-4.67.1-py3-none-any.whl (78 kB)
Collecting pyyaml>=5.1
  Using cached PyYAML-6.0.2-cp310-cp310-win_amd64.whl (161 kB)
Collecting regex!=2019.12.17
  Downloading regex-2025.7.34-cp310-cp310-win_amd64.whl (276 kB)
Collecting safetensors>=0.4.3
  Downloading safetensors-0.6.2-cp38-abi3-win_amd64.whl (320 kB)
Collecting huggingface-hub<1.0,>=0.34.0
  Downloading huggingface_hub-0.34.4-py3-none-any.whl (561 kB)
Collecting requests
  Using cached requests-2.32.4-py3-none-any.whl (64 kB)
Collecting dill
  Downloading dill-0.4.0-py3-none-any.whl (119 kB)
Collecting multiprocess
  Downloading multiprocess-0.70.18-py310-none-any.whl (134 kB)
Collecting pandas
  Downloading pandas-2.3.1-cp310-cp310-win

You should consider upgrading via the 'C:\Users\asus\Desktop\3rd year\legal\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [3]:
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"

In [4]:
!pip install SentencePiece

Collecting SentencePiece
  Downloading sentencepiece-0.2.1-cp310-cp310-win_amd64.whl (1.1 MB)
Installing collected packages: SentencePiece
Successfully installed SentencePiece-0.2.1


You should consider upgrading via the 'C:\Users\asus\Desktop\3rd year\legal\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [3]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration
import evaluate

# ===== CONFIG =====
t5_model_name = "t5-small"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
max_len_input = 512
max_len_output = 128
batch_size = 2
num_epochs = 1
learning_rate = 5e-5

# ===== LOAD DATA =====
def load_data(judgement_dir, summary_dir):
    judgements, summaries = [], []
    for fname in os.listdir(judgement_dir):
        with open(os.path.join(judgement_dir, fname), "r", encoding="utf-8") as jf, \
             open(os.path.join(summary_dir, fname), "r", encoding="utf-8") as sf:
            judgements.append(jf.read())
            summaries.append(sf.read())
    return judgements, summaries

train_judgements, train_summaries = load_data(
    "IN-Abs/IN-Abs/train-data/judgement",
    "IN-Abs/IN-Abs/train-data/summary"
)

test_judgements, test_summaries = load_data(
    "IN-Abs/IN-Abs/test-data/judgement",
    "IN-Abs/IN-Abs/test-data/summary"
)

# ===== DATASET CLASS =====
class LegalSummaryDataset(Dataset):
    def __init__(self, judgements, summaries, tokenizer):
        self.judgements = judgements
        self.summaries = summaries
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.judgements)

    def __getitem__(self, idx):
        inputs = self.tokenizer(
            self.judgements[idx],
            max_length=max_len_input,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        )

        targets = self.tokenizer(
            self.summaries[idx],
            max_length=max_len_output,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        )

        return {
            "input_ids": inputs["input_ids"].squeeze(),
            "attention_mask": inputs["attention_mask"].squeeze(),
            "labels": targets["input_ids"].squeeze()
        }

# ===== LOAD TOKENIZER + MODEL =====
tokenizer = T5Tokenizer.from_pretrained(t5_model_name)
model = T5ForConditionalGeneration.from_pretrained(t5_model_name).to(device)

# ===== DATALOADERS =====
train_dataset = LegalSummaryDataset(train_judgements, train_summaries, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# ===== TRAIN =====
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

# Training loop
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch in train_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        loss = outputs.loss
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{num_epochs} — Avg Loss: {avg_loss:.4f}")

# ===== EVALUATION =====
model.eval()
rouge = evaluate.load("rouge")
predictions = []

with torch.no_grad():
    for text in test_judgements:
        inputs = tokenizer(
            text,
            max_length=max_len_input,
            truncation=True,
            padding="max_length",
            return_tensors="pt"
        ).to(device)

        output_ids = model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_length=max_len_output
        )

        summary = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        predictions.append(summary)

rouge_scores = rouge.compute(predictions=predictions, references=test_summaries)
print("ROUGE Scores:", rouge_scores)

Epoch 1/1 — Avg Loss: 2.7794
ROUGE Scores: {'rouge1': np.float64(0.1818574043701871), 'rouge2': np.float64(0.08554410003535533), 'rougeL': np.float64(0.125336377438863), 'rougeLsum': np.float64(0.1625833910513469)}


In [2]:
!pip install absl-py rouge-score nltk

Collecting absl-py
  Downloading absl_py-2.3.1-py3-none-any.whl (135 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
Collecting nltk
  Downloading nltk-3.9.1-py3-none-any.whl (1.5 MB)
Collecting click
  Downloading click-8.2.1-py3-none-any.whl (102 kB)
Collecting joblib
  Downloading joblib-1.5.1-py3-none-any.whl (307 kB)
Using legacy 'setup.py install' for rouge-score, since package 'wheel' is not installed.
Installing collected packages: joblib, click, nltk, absl-py, rouge-score
    Running setup.py install for rouge-score: started
    Running setup.py install for rouge-score: finished with status 'done'
Successfully installed absl-py-2.3.1 click-8.2.1 joblib-1.5.1 nltk-3.9.1 rouge-score-0.1.2


You should consider upgrading via the 'C:\Users\asus\Desktop\3rd year\legal\venv\Scripts\python.exe -m pip install --upgrade pip' command.


In [5]:
!pip install "numpy<2"

Collecting numpy<2
  Downloading numpy-1.26.4-cp310-cp310-win_amd64.whl.metadata (61 kB)
Downloading numpy-1.26.4-cp310-cp310-win_amd64.whl (15.8 MB)
   ---------------------------------------- 0.0/15.8 MB ? eta -:--:--
   --- ------------------------------------ 1.3/15.8 MB 7.4 MB/s eta 0:00:02
   ------ --------------------------------- 2.6/15.8 MB 6.9 MB/s eta 0:00:02
   --------- ------------------------------ 3.9/15.8 MB 6.5 MB/s eta 0:00:02
   ------------- -------------------------- 5.2/15.8 MB 6.2 MB/s eta 0:00:02
   --------------- ------------------------ 6.3/15.8 MB 6.0 MB/s eta 0:00:02
   ------------------ --------------------- 7.3/15.8 MB 5.8 MB/s eta 0:00:02
   ------------------- -------------------- 7.9/15.8 MB 5.3 MB/s eta 0:00:02
   ---------------------- ----------------- 8.9/15.8 MB 5.4 MB/s eta 0:00:02
   ------------------------- -------------- 10.2/15.8 MB 5.4 MB/s eta 0:00:02
   ---------------------------- ----------- 11.3/15.8 MB 5.4 MB/s eta 0:00:01
   -----

  You can safely remove it manually.
  You can safely remove it manually.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.45.1 requires packaging<25,>=20, but you have packaging 25.0 which is incompatible.

[notice] A new release of pip is available: 25.1.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
!nvidia-smi

Tue Aug 12 16:31:50 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 576.88                 Driver Version: 576.88         CUDA Version: 12.9     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 3060 ...  WDDM  |   00000000:01:00.0 Off |                  N/A |
| N/A   84C    P0             36W /   87W |     135MiB /   6144MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                