# Imports

In [1]:
import datasets
from bs4 import BeautifulSoup
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
import torch
import re
from evaluation import evaluate

  from .autonotebook import tqdm as notebook_tqdm
2024-04-14 04:45:41.716137: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Constants

In [2]:
PATH_TO_OWL = './LMSS.owl'
LLM_PATH = '../gemma-7b-it'

# Set up Llama

In [8]:
tokenizer = AutoTokenizer.from_pretrained(LLM_PATH)
model = AutoModelForCausalLM.from_pretrained(LLM_PATH, device_map=1)

Loading checkpoint shards: 100%|██████████| 8/8 [00:08<00:00,  1.01s/it]


In [9]:
def get_gemma_response(prompt):
    # Tokenize the prompt
    token_inputs = tokenizer(prompt, return_tensors="pt").to("cuda:1")
    
    # Generate the response using Gemma
    token_outputs = model.generate(input_ids=token_inputs['input_ids'], 
                                   do_sample=True, 
                                   max_new_tokens=500, 
                                   temperature=0.5)
    
    # Decode the generated output
    decoded_output = tokenizer.decode(token_outputs[0], skip_special_tokens=True)
    
    return decoded_output


In [10]:
# Conversation prompt
prompt = '''<bos><start_of_turn>user
I'm hosting a backyard pool party this weekend, what sorts of foods should I serve at the gathering?<end_of_turn>
<start_of_turn>model
'''

# Get Gemma's response
response = get_gemma_response(prompt)

# Print Gemma's response
print(response)


user
I'm hosting a backyard pool party this weekend, what sorts of foods should I serve at the gathering?
model
Here are some food ideas to serve at your backyard pool party this weekend:

**Light and Refreshing:**

* **Fruit Skewers with Dip:** Arrange a variety of colorful fruits like strawberries, grapes, watermelon, oranges, and pineapple on skewers and serve with a refreshing dip like fruit punch, iced tea, or lemonade.
* **Fruit and Vegetable Crudités with Dip:** Offer a platter of fresh cut vegetables like carrots, celery, cucumber, and bell peppers with a dip of hummus, guacamole, or tzatziki sauce.
* **Slaw and Salad:** A light and refreshing side dish like coleslaw, pasta salad, or fruit salad will cool down your guests.
* **Watermelon and Mint Granade:** This refreshing drink is perfect for a hot day and is easy to make with fresh watermelon, mint, and sugar.

**Casual and Fun:**

* **Hot Dogs and Burgers:** Set up a grill and offer hot dogs, hamburgers, chicken skewers, or 

# Reading OWL

In [11]:
# Read the contents of the .owl file
with open(PATH_TO_OWL, "r") as owl_file:
    owl_data = owl_file.read()

# Parse the OWL data using BeautifulSoup
soup = BeautifulSoup(owl_data, 'xml')

In [12]:
# Initialize lists to store data
labels = []
definitions = []

# Find all instances of <owl:Class> elements and extract label and definition
for owl_class in soup.find_all('owl:Class'):
    label_element = owl_class.find('rdfs:label')
    definition_element = owl_class.find('skos:definition')
    
    # Check if label and definition elements exist
    if label_element and definition_element:
        label = label_element.text.strip()
        definition = definition_element.text.strip()
        
        # Append data to lists
        labels.append(label)
        definitions.append(definition)

data = {'Label': labels, 'Definition': definitions}
owl_df = pd.DataFrame(data)

owl_df

Unnamed: 0,Label,Definition
0,Other Personal and Household Goods Repair and ...,See industry description for 811490.
1,Other Converted Paper Product Manufacturing,This industry comprises establishments primari...
2,General Medical and Surgical Hospitals,
3,Confectionery Merchant Wholesalers,This industry comprises establishments primari...
4,Other Specialized Design Services,See industry description for 541490.
...,...,...
14248,Vocational Rehabilitation Services,
14249,Books Printing,This U.S. industry comprises establishments pr...
14250,Petrochemical Manufacturing,See industry description for 325110.
14251,Pesticide and Other Agricultural Chemical Manu...,This industry comprises establishments primari...


### Function to get classes

In [13]:
def filter_label_by_substring(df, substring):
    """
    Filter DataFrame rows containing the specified substring in the 'Label' column
    and return a list of strings in the format "{Label} : {Definition}".
    
    Args:
        df (pandas.DataFrame): Input DataFrame.
        substring (str): Substring to search for.
        
    Returns:
        list: List of strings in the format "{Label} : {Definition}" for matching rows.
    """
    filtered_df = df[df['Label'].str.contains(substring, case=False)]
    output_list = []
    for index, row in filtered_df.iterrows():
        output_list.append(f"{row['Label']} : {row['Definition']}")
    return output_list


In [14]:
#example usage
search_substring = 'hearsay'
result = filter_label_by_substring(owl_df, search_substring)
print(result)

['Motion in Limine to Exclude Hearsay Witness : A Motion in Limine to Exclude Hearsay Witness is a legal request made by one party to prevent the other party from presenting testimony from a witness who will testify about statements made by someone else out of court, which are being offered to prove the truth of the matter asserted.', 'Motion to Exclude Hearsay Witness : A Motion to Exclude Hearsay Witness is a legal request to prevent a witness from testifying in court based on the fact that their testimony is based on hearsay evidence.']


# Dataset

In [15]:
dataset_hearsay = datasets.load_dataset("nguha/legalbench", "hearsay")

You can avoid this message in future by passing the argument `trust_remote_code=True`.
Passing `trust_remote_code=True` will be mandatory to load this dataset from the next major release of `datasets`.


In [16]:
test_df = dataset_hearsay['test'].to_pandas()
test_df

Unnamed: 0,answer,index,slice,text
0,No,0,Non-assertive conduct,On the issue of whether James is an smart indi...
1,No,1,Non-assertive conduct,On the issue of whether Robert negligently dro...
2,No,2,Non-assertive conduct,On the issue of whether John knew about the co...
3,No,3,Non-assertive conduct,On the issue of whether Michael was guilty of ...
4,No,4,Non-assertive conduct,On the issue of whether William was loved by h...
...,...,...,...,...
89,No,89,Not introduced to prove truth,To prove that Arthur believed that Amy and Dan...
90,No,90,Not introduced to prove truth,To prove that the trademarks of restaurant A a...
91,No,91,Not introduced to prove truth,To prove that Michael knew of the existing pat...
92,No,92,Not introduced to prove truth,To prove that Arthur and Mary had a conversati...


In [17]:
prompts = test_df["text"].tolist()
prompts

['On the issue of whether James is an smart individual, the fact that James came first in his class in law school.',
 'On the issue of whether Robert negligently drove, the fact that Robert fell asleep while driving.',
 'On the issue of whether John knew about the conspiracy, the fact that John likes sweatpants.',
 'On the issue of whether Michael was guilty of murder, the fact that Michael left the crime scene immediately.',
 'On the issue of whether William was loved by his community, the fact that he was selected to speak at his graduation.',
 'On the issue of whether Mary robbed the bank, the fact that Mary went to the bank in disguise.',
 'On the issue of whether Patricia was a fan of Coldplay, the fact that she had a poster with the lyrics of "Viva la Vida" on her bedroom wall.',
 "On the issue of whether Jennifer suffered reputational harm from Linda's article, the fact that Linda worked with several different editors to proof read and cross-check her article.",
 "On the issue o

In [18]:
prompts[0]

'On the issue of whether James is an smart individual, the fact that James came first in his class in law school.'

In [19]:
def read_tsv(file_path):
    examples = []
    with open(file_path, 'r') as file:
        next(file)
        for line in file:
            index, answer, text, _ = line.strip().split('\t')
            examples.append((text, answer))
    return examples

# Read examples from test.tsv
examples = read_tsv('tasks/hearsay/train.tsv')

print(examples)

[('On the issue of whether David is fast, the fact that David set a high school track record.', 'No'), ('On the issue of whether Rebecca was ill, the fact that Rebecca told Ronald that she was unwell.', 'Yes'), ('"To prove that Tim was a soccer fan, the fact that Tim told Jimmy that ""Real Madrid was the best soccer team in the world."""', 'No'), ('"When asked by the attorney on cross-examination, Alice testified that she had ""never seen the plaintiff before, and had no idea who she was."""', 'No'), ('On the issue of whether Martin punched James, the fact that Martin smiled and nodded when asked if he did so by an officer on the scene.', 'Yes')]


In [113]:
def add_diverse_prompt_v2(prompt_text, filtered_labels):
    """
    Enhanced version of the prompt with additional strategies for diverse answers.

    Args:
        prompt_text (str): The prompt text.
        filtered_labels (list): List of strings containing labels and definitions.

    Returns:
        str: The full prompt text with filtered labels and definitions added.
    """
    full_prompt = f"""<start_of_turn>user

Statement: {prompt_text}

Consider the following ontology classes to structure your argument and analyze whether the information provided falls under the category of hearsay:

"""

    for label_definition in filtered_labels:
        full_prompt += f"{label_definition}\n"

    # Add counterargument and hypothetical scenario
    full_prompt += """
Context: Imagine you are a detective investigating a high-profile case. The statement provided was obtained from a confidential informant. Now, analyze whether it qualifies as hearsay.

Potential Counterargument: Some may argue that since the statement came from a confidential informant, it should be considered reliable evidence rather than hearsay. Consider this perspective in your analysis.

Hypothetical Scenario: Suppose the statement was obtained through a legally approved wiretap. Would your analysis change? Think about the implications of this scenario on the classification of the statement as hearsay.

Output Format: [Reasoning(concise), Final Answer(One word)]
<end_of_turn>

<start_of_turn>model
"""
    return full_prompt.strip()


# Testing - Hearsay

In [114]:
filtered_labels = filter_label_by_substring(owl_df, "hearsay")
filtered_labels

['Motion in Limine to Exclude Hearsay Witness : A Motion in Limine to Exclude Hearsay Witness is a legal request made by one party to prevent the other party from presenting testimony from a witness who will testify about statements made by someone else out of court, which are being offered to prove the truth of the matter asserted.',
 'Motion to Exclude Hearsay Witness : A Motion to Exclude Hearsay Witness is a legal request to prevent a witness from testifying in court based on the fact that their testimony is based on hearsay evidence.']

In [115]:
example_prompt = add_diverse_prompt_v2(prompts[1],filtered_labels)
print(example_prompt)

<start_of_turn>user

Statement: On the issue of whether Robert negligently drove, the fact that Robert fell asleep while driving.

Consider the following ontology classes to structure your argument and analyze whether the information provided falls under the category of hearsay:

Motion in Limine to Exclude Hearsay Witness : A Motion in Limine to Exclude Hearsay Witness is a legal request made by one party to prevent the other party from presenting testimony from a witness who will testify about statements made by someone else out of court, which are being offered to prove the truth of the matter asserted.
Motion to Exclude Hearsay Witness : A Motion to Exclude Hearsay Witness is a legal request to prevent a witness from testifying in court based on the fact that their testimony is based on hearsay evidence.

Context: Imagine you are a detective investigating a high-profile case. The statement provided was obtained from a confidential informant. Now, analyze whether it qualifies as hea

In [117]:
prompt = add_diverse_prompt_v2(prompts[2],filtered_labels)

response = get_gemma_response(prompt)

print(response)

user

Statement: On the issue of whether John knew about the conspiracy, the fact that John likes sweatpants.

Consider the following ontology classes to structure your argument and analyze whether the information provided falls under the category of hearsay:

Motion in Limine to Exclude Hearsay Witness : A Motion in Limine to Exclude Hearsay Witness is a legal request made by one party to prevent the other party from presenting testimony from a witness who will testify about statements made by someone else out of court, which are being offered to prove the truth of the matter asserted.
Motion to Exclude Hearsay Witness : A Motion to Exclude Hearsay Witness is a legal request to prevent a witness from testifying in court based on the fact that their testimony is based on hearsay evidence.

Context: Imagine you are a detective investigating a high-profile case. The statement provided was obtained from a confidential informant. Now, analyze whether it qualifies as hearsay.

Potential Cou

In [63]:
responses = []

filtered_labels = filter_label_by_substring(owl_df, "hearsay")

for i, prompt_text in enumerate(prompts):

    full_prompt = add_diverse_prompt_v2(prompt_text, filtered_labels)

    response = get_gemma_response(full_prompt)
    
    responses.append(response)
    
    print(f"Done for prompt {i+1}")
    # print(response)


Done for prompt 1
Done for prompt 2
Done for prompt 3
Done for prompt 4
Done for prompt 5
Done for prompt 6
Done for prompt 7
Done for prompt 8
Done for prompt 9
Done for prompt 10
Done for prompt 11
Done for prompt 12
Done for prompt 13
Done for prompt 14
Done for prompt 15
Done for prompt 16
Done for prompt 17
Done for prompt 18
Done for prompt 19
Done for prompt 20
Done for prompt 21
Done for prompt 22
Done for prompt 23
Done for prompt 24
Done for prompt 25
Done for prompt 26
Done for prompt 27
Done for prompt 28
Done for prompt 29
Done for prompt 30
Done for prompt 31
Done for prompt 32
Done for prompt 33
Done for prompt 34
Done for prompt 35
Done for prompt 36
Done for prompt 37
Done for prompt 38
Done for prompt 39
Done for prompt 40
Done for prompt 41
Done for prompt 42
Done for prompt 43
Done for prompt 44
Done for prompt 45
Done for prompt 46
Done for prompt 47
Done for prompt 48
Done for prompt 49
Done for prompt 50
Done for prompt 51
Done for prompt 52
Done for prompt 53
Do

In [70]:
print(responses[0])

user

Statement: On the issue of whether James is an smart individual, the fact that James came first in his class in law school.

Consider the following ontology classes to structure your argument and analyze whether the information provided falls under the category of hearsay or not:

Motion in Limine to Exclude Hearsay Witness : A Motion in Limine to Exclude Hearsay Witness is a legal request made by one party to prevent the other party from presenting testimony from a witness who will testify about statements made by someone else out of court, which are being offered to prove the truth of the matter asserted.
Motion to Exclude Hearsay Witness : A Motion to Exclude Hearsay Witness is a legal request to prevent a witness from testifying in court based on the fact that their testimony is based on hearsay evidence.

Context: Imagine you are a legal expert reviewing a court case. The statement provided is part of the evidence presented during the trial. Your task is to determine if it q

### Parsing output

In [71]:
def extract_first_word_after_model_from_list(text_list):
    results = []
    for text in text_list:
        # Find all occurrences of "model" followed by a word, possibly followed by "**Answer:**"
        matches = re.findall(r'\bmodel\b\s+(\w+)(?:\*\*Answer:\*\*)?', text)
        # Append the first word found after "model" if any, otherwise append None
        results.append(matches[0] if matches else None)
    return results


In [72]:
extracted_answers = extract_first_word_after_model_from_list(responses)

In [74]:
print(extracted_answers)

[None, 'Yes', 'Yes', None, None, None, None, 'Yes', None, 'Yes', 'Yes', None, None, 'Yes', None, None, 'Yes', None, None, None, None, None, None, None, None, 'Yes', None, None, None, 'Yes', 'Yes', None, None, None, 'Yes', None, 'Yes', 'Yes', 'Yes', None, None, 'Yes', None, None, None, None, None, None, None, None, 'Yes', 'Yes', 'Yes', None, None, 'Yes', 'Yes', None, 'Yes', None, None, None, 'Yes', None, None, None, None, None, None, None, None, 'Yes', 'Yes', None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, 'Yes', 'Yes', 'Yes', 'Yes', None]


In [75]:
evaluate("hearsay", extracted_answers, test_df["answer"].tolist()[:len(extracted_answers)])



0.15853658536585366