In [2]:
import pickle
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Load embeddings
with open("data/embeddings/embeddings.pkl", "rb") as f:
    data = pickle.load(f)

embeddings = data["embeddings"]
metadata = data["metadata"]

# Build FAISS index
d = embeddings.shape[1]  # dimension
index = faiss.IndexFlatL2(d)
index.add(embeddings)

print(f"FAISS index built with {len(metadata)} entries")

# Load same embedder
embedder = SentenceTransformer("all-mpnet-base-v2")

label_map = {
    0: "SCR",   # Similar Case Retrieval
    1: "PCR"    # Precedent Case Retrieval
}

def search_cases(query, top_k=5, filter_label=None):
    query_vec = embedder.encode([query], convert_to_numpy=True)
    D, I = index.search(query_vec, top_k)
    
    results = []
    for idx in I[0]:
        case = metadata[idx]
        case_label = int(case["label"])  # convert string → int
        case["label_name"] = label_map[case_label]
        
        if filter_label and case["label_name"] != filter_label:
            continue
        results.append(case)
    return results

FAISS index built with 577 entries


for i in range(5):
    print(metadata[i])

labels = set([r["label"] for r in metadata])
print(labels)

In [3]:
# Similar Case Retrieval (SCR)
results = search_cases("right to privacy under the 4th amendment", top_k=3)
for r in results:
    print(r["case_id"], r["text"][:200])

0328-01_4 in a criminal case. The Chandler court viewed Florida’s decision to allow audiovisual coverage of court proceedings as “an exercise of authority reserved to the states under our federalism.” (Id. at 5
0321-01_3 (2) implicitly, when the Federal legislation is so comprehensive in scope that it is inferable that Congress intended to fully occupy the “field” of its subject matter; or (3) implicitly, when State l
0618-01_7 you have the right to remain silent. Anything you say can and will be used against you in a court of law. You have the right to talk to a lawyer and have him present with you while you’re being questi


In [20]:
results_scr = search_cases("property dispute", top_k=3, filter_label="SCR")
results_pcr = search_cases("fundamental rights violation", top_k=3, filter_label="PCR")