In [1]:
# # This script generates embeddings for legal documents in various subdomains using the SentenceTransformer model.
# It reads cleaned text files, generates embeddings, and saves them as .npy files.

import os
import numpy as np
from sentence_transformers import SentenceTransformer

# Load the embedding model
model = SentenceTransformer('all-MiniLM-L6-v2')

# List of subdomain folders
subdomains = [
    "company_law", "tax_law", "banking_law", "securities_law", "insolvency_law",
    "contract_law", "negotiable_instruments_law", "consumer_law", "ip_law",
    "arbitration_law", "trust_law", "electronic_transactions_law", "foreign_exchange_law"
]

base_dir = "../data/subdomains/"

for subdomain in subdomains:
    folder = os.path.join(base_dir, subdomain)
    for fname in os.listdir(folder):
        if fname.endswith(".cleaned.txt"):
            file_path = os.path.join(folder, fname)
            with open(file_path, "r", encoding="utf-8") as f:
                text = f.read()
            # Generate embedding
            embedding = model.encode([text])[0]
            # Save embedding as .npy file
            npy_path = file_path.replace(".cleaned.txt", ".embedding.npy")
            np.save(npy_path, embedding)
            print(f"Embedding saved: {npy_path}")


Embedding saved: ../data/subdomains/company_law\01. Companies Act No. 7 of 2007.embedding.npy
Embedding saved: ../data/subdomains/tax_law\02. Inland Revenue Act_No_24_2017_E.embedding.npy
Embedding saved: ../data/subdomains/tax_law\03. Inland Revenue (Amendment) Act No. 2 of 2025.embedding.npy
Embedding saved: ../data/subdomains/banking_law\04. Banking Act 30_1988.embedding.npy
Embedding saved: ../data/subdomains/banking_law\05. Banking_Amendment_Act_No_24_of_2024_e.embedding.npy
Embedding saved: ../data/subdomains/banking_law\06. Banking (Special Provisions) Act, No. 17 of 2023.embedding.npy
Embedding saved: ../data/subdomains/securities_law\07. Securities and Exchange Commission of Sri Lanka.embedding.npy
Embedding saved: ../data/subdomains/insolvency_law\08. INSOLVENTS [Cap.103 - Lanka Law.embedding.npy
Embedding saved: ../data/subdomains/contract_law\09. Sale_of_Goods_Ordinance_No_11_of_1896_of.embedding.npy
Embedding saved: ../data/subdomains/negotiable_instruments_law\10. Bills o