In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

_________

In [2]:

import pandas as pd
import numpy as np
import seaborn as sns

from transformers import AutoTokenizer, AutoModel
import torch
import torch.nn.functional as F


# Excel Input requires following sheet:
Per text:
- Sheet containing original arguments with columns arg_num, arg_text: original_{text_id}
- Sheet containing LLM arguments with columns arg_num, arg_text: llm_{text_id}

Sheet names can be defined below.<br>
Optionally, sheets with manual matches (e.g. multiple arguments grouped together manually) or reworked llm arguments can be added. Naming should follow similar patterns.

In [None]:

# echr texts that have been previously processed (pipeline output exists)
texts_processed = ['00', '04', '05', '06', '10', '13', '16', '20', '21']
texts_processed_with_manual_matches = ['04','06', '10', '13', '16']
texts_processed_long = ['01','02','17','19','27','29']

# paths
# these sheetnames are defined in the input xlsx
input_sheetname_original = 'original_'
input_sheetname_llm = 'llm_'
input_sheetname_manual = 'manual_'
input_sheetname_llm_reworked = 'llm1_'

# dict output path reworked prompts
dict_output_llm = {input_sheetname_llm : '',
                   input_sheetname_llm_reworked : 'reworked1/'
                   }

path_root = '../Arg_similarity/'
#path_input = path_root + 'input_arg_similarity_zeroshot.xlsx' # TODO change back
#path_input = path_root + 'input_arg_similarity.xlsx' # TODO change back
#path_input = path_root + 'input_arg_similarity_long.xlsx' # TODO change back
path_input = path_root + 'input_arg_similarity_argBuf.xlsx' # TODO change back

#folder_output = path_root + 'output_arg_similarity/'
#folder_output = path_root + 'output_arg_similarity_zeroshot/'
folder_output = path_root + 'output_arg_similarity_argBuf/'
folder_feedback = path_root + 'feedback_arg_similarity/'

# model for similarity score
similarity_model = 'sentence-transformers/all-MiniLM-L6-v2'


______

# Output Generation

In [5]:

#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):

    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()

    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)


def get_sentence_similarity(sentence1, sentence2):

    # Sentences we want sentence embeddings for
    sentences = [sentence1, sentence2]

    # Load model from HuggingFace Hub
    tokenizer = AutoTokenizer.from_pretrained(similarity_model)
    model = AutoModel.from_pretrained(similarity_model)

    # Tokenize sentences
    encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')

    # Compute token embeddings
    with torch.no_grad():
        model_output = model(**encoded_input)

    # Perform pooling
    sentence_embeddings = mean_pooling(model_output, encoded_input['attention_mask'])

    # Normalize embeddings
    sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)

    cos = torch.nn.CosineSimilarity(dim=0)
    sim = cos(sentence_embeddings[0], sentence_embeddings[1])

    return sim.item()


In [6]:

# only prints similarity scores, does not save output
def print_all_similarities(texts_processed, llm_sheet):

  for t in texts_processed:

      print(f'T: {t}')

      sheetname_orignal = input_sheetname_original + t
      sheetname_llm = llm_sheet + t

      data_original = pd.read_excel(path_input, sheet_name=sheetname_orignal)
      data_llm = pd.read_excel(path_input, sheet_name=sheetname_llm)

      for index_original, row_original in data_original.iterrows():
          print(f"ORIGINAL ARGUMENT {row_original['arg_num']}:")
          print(row_original['arg_text'])

          for index_llm, row_llm in data_llm.iterrows():

              sim_score = get_sentence_similarity(row_original['arg_text'], row_llm['arg_text'])

              if sim_score >= 0.7:
                  print(f"Original arg {row_original['arg_num']} has a similarity of {sim_score} with LLM arg {row_llm['arg_num']}")

              # print('- - - - - - - - - - - - - - ')


      sheetname_manual = input_sheetname_manual + t

      try:
          data_manual = pd.read_excel(path_input, sheet_name=sheetname_manual)

      except:
          print("_________ _________ _________ _________ _________ _________ _________ _________ ")
          continue

      print('---------- Manual matching:')

      for index_manual, row_manual in data_manual.iterrows():

          sim_score = get_sentence_similarity(row_manual['llm_text'], row_manual['original_text'])

          print(f'The manual matching of LLM arg(s) {row_manual["llm_num"]} and original arg(s) {row_manual["original_num"]} is {sim_score}')

      print("_________ _________ _________ _________ _________ _________ _________ _________ ")


#print_all_similarities(texts_processed, input_sheetname_llm) # here we can also used another llm output (e.g. reworked output)


In [None]:
print_all_similarities(texts_processed_with_manual_matches, input_sheetname_llm)

In [7]:

def get_matches(sim_threshold, base_data, matched_data):

  df_matches = pd.DataFrame(columns=['arg_num', 'arg_text', 'matched_num', 'matched_text'])
  df_no_matches = pd.DataFrame(columns=['arg_num', 'arg_text'])

  for index_base, row_base in base_data.iterrows():

      one_match = False
      for index_matched, row_matched in matched_data.iterrows():

          sim_score = get_sentence_similarity(row_base['arg_text'], row_matched['arg_text'])

          if sim_score >= sim_threshold:
              print(f"Base arg {row_base['arg_num']} has a similarity of {sim_score} with LLM arg {row_matched['arg_num']}")
              df_matches = pd.concat([df_matches, pd.DataFrame([{'arg_num': row_base['arg_num'],
                                                                 'arg_text': row_base['arg_text'],
                                                                 'matched_num': row_matched['arg_num'],
                                                                 'matched_text': row_matched['arg_text'],
                                                                 'similarity': sim_score}])],
                                     ignore_index=True)
              one_match = True

      if one_match == False:
        print(f"Base arg {row_base['arg_num']} has no match with a similarity score >= {sim_threshold}")
        df_no_matches = pd.concat([df_no_matches, pd.DataFrame([{'arg_num': row_base['arg_num'],
                                                                 'arg_text': row_base['arg_text']}])],
                                  ignore_index=True)

  return df_matches, df_no_matches


In [None]:

def compute_and_save_matches(texts, llm_sheet, threshold):

  for t in texts:

      sheetname_orignal = input_sheetname_original + t
      sheetname_llm = llm_sheet + t

      data_original = pd.read_excel(path_input, sheet_name=sheetname_orignal)
      data_llm = pd.read_excel(path_input, sheet_name=sheetname_llm)

      threshold = threshold
      print(f'Getting matches for text {t} with threshold {threshold}')

      # Matches on ORIGINAL
      original_matches, original_no_matches = get_matches(threshold, data_original, data_llm)

      # Matches on LLM:
      llm_matches, llm_no_matches = get_matches(threshold, data_llm, data_original)

      export_path = f'{folder_output}/{threshold}/{dict_output_llm[llm_sheet]}'
      export_path_end = '.xlsx'

      original_matches.to_excel(export_path + t + '_original_matches'  + export_path_end, index=False)
      original_no_matches.to_excel(export_path + t + '_original_no_matches'  + export_path_end, index=False)
      llm_matches.to_excel(export_path + t + '_llm_matches'  + export_path_end, index=False)
      llm_no_matches.to_excel(export_path + t + '_llm_no_matches'  + export_path_end, index=False)

      print('_____________________________________________________________________________________')

compute_and_save_matches(texts_processed, input_sheetname_llm, 0.8) # here we can also used another llm output (e.g. reworked output)


In [None]:

def convert_to_percent_str(value):

  rounded = round(value * 100, 4)

  return str(rounded) + '%'


# Evaluate Output

In [None]:

def print_ratio_and_similarity(texts, llm_sheets, threshold):

  for t in texts:

    print(f'Text: {t}')

    for llm_sheet in llm_sheets:

      print(" > > > > > > > > ")
      print(f'LLM sheet: {llm_sheet}')

      sheetname_original = input_sheetname_original + t
      sheetname_llm = llm_sheet + t

      data_original = pd.read_excel(path_input, sheet_name=sheetname_original)
      data_llm = pd.read_excel(path_input, sheet_name=sheetname_llm)

      output_path = f'{folder_output}/{threshold}/{dict_output_llm[llm_sheet]}'
      export_path_end = '.xlsx'

      original_matches = pd.read_excel(output_path + t + '_original_matches'  + export_path_end)
      llm_matches = pd.read_excel(output_path + t + '_llm_matches'  + export_path_end)

      print('------ Base original annotations:')
      count_original_arguments = len(data_original)
      count_original_matched_arguments = len(original_matches.drop_duplicates(subset=['arg_num']))
      ratio_original_matched = round(count_original_matched_arguments/count_original_arguments * 100, 4)
      print(f'{count_original_matched_arguments} out of {count_original_arguments} matched ({ratio_original_matched}%)')

      df_grouped = original_matches.groupby('arg_num').agg({'similarity': ['min', 'mean', 'max']})
      df_grouped.columns = df_grouped.columns.get_level_values(1)
      print(f"Mean min similarity: {convert_to_percent_str(np.mean(df_grouped['min']))}")
      print(f"Mean mean similarity: {convert_to_percent_str(np.mean(df_grouped['mean']))}")
      print(f"Mean max similarity: {convert_to_percent_str(np.mean(df_grouped['max']))}")

      print('------ Base LLM annotations:')
      count_llm_arguments = len(data_llm)
      count_llm_matched_arguments = len(llm_matches.drop_duplicates(subset=['arg_num']))
      ratio_llm_matched = round(count_llm_matched_arguments/count_llm_arguments * 100, 4)
      print(f'{count_llm_matched_arguments} out of {count_llm_arguments} matched ({ratio_llm_matched}%)')

      df_grouped = llm_matches.groupby('arg_num').agg({'similarity': ['min', 'mean', 'max']})
      df_grouped.columns = df_grouped.columns.get_level_values(1)
      print(f"Mean min similarity: {convert_to_percent_str(np.mean(df_grouped['min']))}")
      print(f"Mean mean similarity: {convert_to_percent_str(np.mean(df_grouped['mean']))}")
      print(f"Mean max similarity: {convert_to_percent_str(np.mean(df_grouped['max']))}")

    print("_________ _________ _________ _________ _________ _________ _________ _________ ")


In [None]:

def print_aggregated_ratio(texts, llm_sheets, threshold):

  print(f'THRESHOLD: {threshold}')

  for llm_sheet in llm_sheets:

    print(f'LLM sheet: {llm_sheet}')

    matches_on_original_rel = []
    matches_on_llm_rel = []

    for t in texts:

      sheetname_original = input_sheetname_original + t
      sheetname_llm = llm_sheet + t

      data_original = pd.read_excel(path_input, sheet_name=sheetname_original)
      data_llm = pd.read_excel(path_input, sheet_name=sheetname_llm)

      output_path = f'{folder_output}/{threshold}/{dict_output_llm[llm_sheet]}'
      export_path_end = '.xlsx'

      original_matches = pd.read_excel(output_path + t + '_original_matches'  + export_path_end)
      llm_matches = pd.read_excel(output_path + t + '_llm_matches'  + export_path_end)

      count_original_arguments = len(data_original)
      count_original_matched_arguments = len(original_matches.drop_duplicates(subset=['arg_num']))
      ratio_original_matched = round(count_original_matched_arguments/count_original_arguments * 100, 4)
      matches_on_original_rel.append(ratio_original_matched)

      count_llm_arguments = len(data_llm)
      count_llm_matched_arguments = len(llm_matches.drop_duplicates(subset=['arg_num']))
      ratio_llm_matched = round(count_llm_matched_arguments/count_llm_arguments * 100, 4)
      matches_on_llm_rel.append(ratio_llm_matched)

    print(f'Mean matching ratio on original: {round(np.mean(matches_on_original_rel), 2)} %')
    print(f'Mean matching ratio on llm: {round(np.mean(matches_on_llm_rel), 2)} %')

    print("---------------------")


In [None]:

def print_aggregated_ratio_all_thresholds(texts, llm_sheets, thresholds):

  for th in thresholds:

    print_aggregated_ratio(texts, llm_sheets, th)


In [None]:
print_ratio_and_similarity(texts_processed, [input_sheetname_llm, input_sheetname_llm_reworked], 0.75)

In [None]:
print_aggregated_ratio(texts_processed, [input_sheetname_llm, input_sheetname_llm_reworked], 0.75)

In [None]:

def print_args_without_match(texts, llm_sheet, threshold):

  matches_on_original_rel = []
  matches_on_llm_rel = []

  for t in texts:

    print(f'TEXT: {t}')

    sheetname_orignal = input_sheetname_original + t
    sheetname_llm = llm_sheet + t

    data_original = pd.read_excel(path_input, sheet_name=sheetname_orignal)
    data_llm = pd.read_excel(path_input, sheet_name=sheetname_llm)

    output_path = f'{folder_output}/{threshold}/{dict_output_llm[llm_sheet]}'
    export_path_end = '.xlsx'

    original_no_matches = pd.read_excel(output_path + t + '_original_no_matches'  + export_path_end)
    llm_no_matches = pd.read_excel(output_path + t + '_llm_no_matches'  + export_path_end)

    if len(original_no_matches) > 0:
      print('\n')
      print('--------- The following original arguments have no match:')
      for index, row in original_no_matches.iterrows():
        print(f"{row['arg_num']} : {row['arg_text']}")

    if len(llm_no_matches) > 0:
      print('\n')
      print('--------- The following LLM arguments have no match:')
      for index, row in llm_no_matches.iterrows():
        print(f"{row['arg_num']} : {row['arg_text']}")

    print("_________ _________ _________ _________ _________ _________ _________ _________ ")
    print('\n')



In [None]:
print_args_without_match(texts_processed, input_sheetname_llm_reworked, 0.8)

TEXT: 00


--------- The following original arguments have no match:
3 : The applicant was not detained on remand prior to his trial and the judicial control of the lawfulness of his subsequent detention after conviction was provided by the first instance court (cf. Eur. Court H.R., De Wilde, Ooms and Versyp judgment of 18 June 1971, Series A no. 12, p. 40, para. 76). 
 It follows that this part of the application is manifestly illfounded and must be rejected in accordance with Article 27 para. 2  (Art. 27-2) of the Convention. 
5 : The applicant next complains under Article 6 para. 1 (Art. 6-1) of the Convention of
in that , allegedly, insufficient reasons were given by the courts to justify his conviction.   
an unfair hearing in the determination of the criminal charges against him,
6 : and of not having received a written copy of the first instance judgment in time for the preparation of his appeal.
He alleges that witnesses on his behalf were not duly heard by the courts. 
The app