In [2]:
import pandas as pd
import torch
import json
from transformers import AutoModelForCausalLM, AutoTokenizer

# Initialize the model and tokenizer
model_name = "meta-llama/Llama-3.1-8B-Instruct"

# Load the model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="cuda",
    use_cache=None,
    attn_implementation=None,
)
model.device

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [8]:
def generate_output(prompt, max_tokens):
    batch = tokenizer(prompt, return_tensors="pt")
    batch = {k: v.to("cuda") for k, v in batch.items()}
    with torch.no_grad():
        outputs = model.generate(
            **batch,
            max_new_tokens=max_tokens,
            do_sample=False,
            top_p=1.0,
            temperature=0,
            use_cache=True,
            top_k=50,
            repetition_penalty=1.2,
            length_penalty=1,
            output_hidden_states=True,
            return_dict_in_generate=True,
        )
    output_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
    return output_text[len(prompt):].strip()
# Load input JSON file
input_file = "legal_reasoning_dataset.json"  # Path to input JSON file
output_json_file = "/home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json"  # Path to save the output CSV file
try:
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)
except Exception as e:
    print(f"Error loading input file: {e}")
    data = {"legal_scenarios": []}
# Prepare a list to store rows for the CSV
output_scenarios = []
# Process each scenario

for scenario in data.get("legal_scenarios", []):
    legal_context = scenario['Context']
    question = scenario['Question']
    options = scenario['Options']
    
    # Generate the first output
    user_prompt1 = legal_context + question + options + '''Task:
    You are a helpful legal assistant. Choose the correct option by performing legal reasoning while strictly adhering to
    the legal context and analysis provided. There is only one single correct answer. So only one option is correct.
    
    Important: Dont generate any beginning or end notes in the response . ONLY THE STEP BY STEP LEGAL REASONING FOLLOWED BY THE CORRECT OPTION MAINTAINING THE CORRECT FORMAT.

    The Reasoning steps must be titled "### Reasoning Steps:" and Answer of Choice must be titled "### Answer of Choice:" so that the output can be parsed. 
    There will be a heavy penalty in the failure to do so.
    
    Please ensure that the output is clearly separated by special markers so that it can be parsed into individual elements later on.

    ---

    ### Reasoning Steps:
    Step 1: [Premise-based reasoning derived from the legal_context]
    Step 2: [Next reasoning step derived from previous steps and citing relevant premises]
    ...
    Step N: [Final reasoning step and conclusion based on the chain of reasoning]

    ---

    ### Answer of Choice:
    Just one alphabet corresponding to the answer choice (A, B, C or D)
    '''
    
    output_text1 = generate_output(user_prompt1, 1000)
    
    # Extract reasoning and correct answer
    reasoning_start = output_text1.find("### Reasoning Steps:")
    answer_start = output_text1.find("### Answer of Choice:")

    if reasoning_start != -1 and answer_start != -1:
        legal_reasoning = output_text1[reasoning_start + len("### Reasoning Steps:"):answer_start].strip()
        correct_answer = output_text1[answer_start + len("### Answer of Choice:"):].strip()[:1]
    else:
        legal_reasoning = "Error parsing reasoning."
        correct_answer = "Error parsing answer."

    # Add to output scenarios
    output_scenarios.append({
        "id": scenario_id,
        "context": legal_context,
        "question": question,
        "options": options,
        "legal_reasoning": legal_reasoning,
        "correct_answer": correct_answer
    })

    # Wrap in the required structure
    output_data = {
        "legal_scenarios": output_scenarios
    }
    
    # Save to JSON file
    try:
        with open(output_json_file, "w", encoding="utf-8") as f:
            json.dump(output_data, f, indent=4, ensure_ascii=False)
        #print(f"Output saved to {output_json_file}")
    except Exception as e:
        print(f"Error saving output file: {e}")


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.


Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json
Output saved to /home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_answers.json


In [4]:
def generate_output(prompt, max_tokens):
    batch = tokenizer(prompt, return_tensors="pt")
    batch = {k: v.to("cuda") for k, v in batch.items()}
    with torch.no_grad():
        outputs = model.generate(
            **batch,
            max_new_tokens=max_tokens,
            do_sample=False,
            top_p=1.0,
            temperature=0,
            use_cache=True,
            top_k=50,
            repetition_penalty=1.2,
            length_penalty=1,
            output_hidden_states=True,
            return_dict_in_generate=True,
        )
    output_text = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True)
    return output_text[len(prompt):].strip()

input_file = "legal_reasoning_with_questions.json"  # Path to input JSON file
output_json_file = "/home/smanylal/MidTermReport1/NLPFinal/New/legal_reasoning_with_final_answers.json"

# Load the input file
try:
    with open(input_file, "r", encoding="utf-8") as f:
        data = json.load(f)
except Exception as e:
    print(f"Error loading input file: {e}")
    data = {"legal_scenarios": []}

json_rows = []

# Process each scenario
for scenario in data.get("legal_scenarios", []):
    scenario_id = scenario['id']
    legal_context = scenario['context']
    question = scenario['question']
    options = scenario['options']
    legal_reasoning = scenario['legal_reasoning']
    correct_answer = scenario['correct_answer']
    verification_questions = scenario.get('verification_questions', [])

    # Ensure verification_questions is properly formatted for use
    if isinstance(verification_questions, list):
        verification_questions_text = '\n'.join(verification_questions)
    else:
        print(f"Error: Verification questions are not a list for scenario ID {scenario_id}")
        verification_questions_text = "Error in verification questions format."


    #print(verification_questions_text)
    # Generate prompt for the output
    user_prompt3 = (
        legal_context + question + options + legal_reasoning + verification_questions_text +
        '''Task:
        You are a helpful legal assistant.  You are given:
        1) a legal_context.
        2) a question.
        3) answer options to the given question out of which only one is true. 
        4) the legal_reasoning behind the legal_context.
        5) verification questions

        Your task is to analyse the reasoning steps and verify them against their corresponding verification_questions. 
        If answering the questions results in a change of the response, provide the following:

        Rewrite the reasoning steps to correct the identified issues, ensuring that they address the faults or fallacies identified through the verification questions.
        If the correct answer changes due to the updated reasoning, provide the revised choice.

        Strictly follow the guidelines given below. the answers must be in the format given below.
        The Final Answer of Choice must be titled "### Final Answer of Choice:" so that the output can be parsed. there will be a heavy penalty if this isn't followed.
        Please ensure that the output is clearly separated by special markers as given below so that it can be parsed into individual elements later on:

        ---
        
        ### Updated Reasoning Steps:
        Step 1: [Premise-based reasoning derived from the legal_context]
        Step 2: [Next reasoning step derived from previous steps and citing relevant premises]
        ...
        Step N: [Final reasoning step and conclusion based on the chain of reasoning]

        ---

        ### Final Answer of Choice:
        Just one alphabet corresponding to the answer choice (A, B, C or D)'''
    )

    try:
        # Generate output from the prompt
        final_answer = generate_output(user_prompt3, 1500)
    except Exception as e:
        print(f"Error generating output for scenario ID {scenario_id}: {e}")
        final_answer = "Error generating output."

    # Extract reasoning and correct answer
    reasoning_start = final_answer.find("### Updated Reasoning Steps")
    answer_start = final_answer.find("### Final Answer of Choice")

    if reasoning_start != -1 and answer_start != -1:
        updated_legal_reasoning = final_answer[
            reasoning_start + len("### Updated Reasoning Steps"):answer_start
        ].strip()
        final_correct_answer = final_answer[
            answer_start + len("### Final Answer of Choice"):
        ].strip()[2:3]
    else:
        final_correct_answer = final_answer
        updated_legal_reasoning = final_answer

    # Append the data to the list for JSON
    json_rows.append({
        "ID": scenario_id,
        "Context": legal_context,
        "Question": question,
        "Options": options,
        "legal_reasoning": legal_reasoning,
        "correct_answer": correct_answer,
        "verification_questions": verification_questions,
        "updated_reasoning": updated_legal_reasoning,
        "final_answer": final_correct_answer
    })

# Wrap the list in the final JSON structure
    output_data = {
        "legal_scenarios": json_rows
    }
    
    # Save the data to the JSON file
    try:
        with open(output_json_file, "w", encoding="utf-8") as f:
            json.dump(output_data, f, indent=4, ensure_ascii=False)
        #print(f"Output saved to {output_json_file}")
    except Exception as e:
        print(f"Error saving output file: {e}")


Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Setting `pad_token_id` to `eos_token_id`:None for open-end generation.
Settin