Setup

In [1]:
!pip install transformers sentence-transformers torch captum


/bin/bash: /home/liorkob/.conda/envs/judgeEnv/bin/pip: /home/itayraz/.conda/envs/judgeEnv/bin/python: bad interpreter: Permission denied


Basic Siamese Model for Similarity

In [None]:
from sentence_transformers import SentenceTransformer
import torch
import torch.nn.functional as F

# Load model
model = SentenceTransformer('all-MiniLM-L6-v2')  # or your custom Siamese model
model.eval()


IJ Implementation

In [None]:
from captum.attr import IntegratedGradients
from transformers import AutoTokenizer
import numpy as np

tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/all-MiniLM-L6-v2')

# Define similarity model wrapper for Captum
class SimilarityWrapper(torch.nn.Module):
    def __init__(self, model):
        super().__init__()
        self.model = model

    def forward(self, input1, input2):
        # B x D
        emb1 = self.model.encode(input1, convert_to_tensor=True)
        emb2 = self.model.encode(input2, convert_to_tensor=True)
        sim = F.cosine_similarity(emb1, emb2)
        return sim

similarity_model = SimilarityWrapper(model)

# Example verdicts
verdict_a = "The defendant was convicted of drug trafficking."
verdict_b = "The accused was found guilty of smuggling illegal substances."

# Create inputs
inputs = [verdict_a, verdict_b]
baseline = [""] * 2  # empty string as baseline

# Integrated Gradients
ig = IntegratedGradients(similarity_model)

attributions, delta = ig.attribute(inputs=inputs, baselines=baseline, return_convergence_delta=True)

print("Attributions:", attributions)
print("Delta:", delta)
