In [None]:
# ==============================================================================
# STEP 1: SETUP AND INITIALIZATION FOR VERTEX AI
# ==============================================================================
import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig, HarmCategory, HarmBlockThreshold
import pandas as pd
import os
import json
from tqdm.notebook import tqdm
import time

# --- Configuration for Vertex AI Workbench ---
# Using the project ID identified from the previous error.
PROJECT_ID = "hack-thelaw25cam-586"
LOCATION = "us-central1" 

# Initialize the Vertex AI SDK. It will use the credentials of your Workbench instance.
vertexai.init(project=PROJECT_ID, location=LOCATION)
print(f"Vertex AI initialized for project: {PROJECT_ID}")

# --- Model Configuration ---
generation_config = GenerationConfig(
    temperature=0.2,
    top_p=0.95,
    top_k=40,
    max_output_tokens=8192,
)

safety_settings = {
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}

# --- CORRECTED MODEL NAME ---
# Using the official public name for Gemini 1.5 Flash.
model = GenerativeModel(
    "gemini-2.0-flash-lite-001",
    generation_config=generation_config,
    safety_settings=safety_settings
)
print("Setup complete. Gemini model via Vertex AI is ready.")


# ==============================================================================
# STEP 2: FUNCTION TO ANALYZE TEXT WITH ENHANCED CONTEXT EXTRACTION
# ==============================================================================
def analyze_decision_text(decision_text: str) -> list:
    """
    Sends decision text to Gemini and asks it to extract legal arguments with rich context
    optimized for similarity matching.
    """
    # --- UPDATED: New enhanced prompt for richer context ---
    prompt = [
        "You are an expert legal analyst specializing in international arbitration. Your task is to read the following legal decision text and identify the distinct arguments made by the Claimant (or Petitioner/Investor) and the Respondent (or Defendant/State).",
        "For each distinct argument you identify, you must provide a structured JSON object with the following information:",
        "1. `argument_summary`: A concise, one-sentence summary of the argument.",
        "2. `party`: The party that made the argument ('Claimant' or 'Respondent').",
        "3. `legal_basis`: The specific treaty articles (e.g., 'DR-CAFTA Art. 10.16.1'), legal principles (e.g., 'effet utile', 'VCLT Art. 31'), or case law cited as the foundation for the argument.",
        "4. `key_keywords`: A JSON list of important legal or factual keywords related to the argument (e.g., ['reflective loss', 'fork-in-the-road', 'waiver requirement', 'NAFTA']).",
        "5. `court_followed`: Whether the tribunal/court followed the argument ('Yes', 'No', or 'Partial/Deferred').",
        "6. `tribunal_reasoning`: A concise summary of WHY the tribunal agreed or disagreed with this specific argument. If the reasoning is not explicitly stated, use 'N/A'.",

        "Provide your output as a valid JSON array of objects only. Do not include any other text, explanation, or markdown formatting. Each object must have these exact keys: `argument_summary`, `party`, `legal_basis`, `key_keywords`, `court_followed`, `tribunal_reasoning`.",
        "Example output format:",
        """
[
  {
    "argument_summary": "The Respondent argued that claims for reflective loss must be brought under Article 10.16.1(b) to avoid rendering that provision meaningless.",
    "party": "Respondent",
    "legal_basis": "DR-CAFTA Art. 10.16.1(b), Principle of 'effet utile', VCLT Art. 31",
    "key_keywords": ["reflective loss", "derivative claim", "effet utile", "shareholder claims", "treaty interpretation"],
    "court_followed": "No",
    "tribunal_reasoning": "The majority found that Article 10.16.1(b) was not rendered useless because it offers a different utility, such as recovering 100% of an enterprise's losses, which is distinct from a shareholder's specific loss claim."
  }
]
        """,
        "Here is the legal decision text:",
        "---",
        decision_text
    ]

    try:
        response = model.generate_content(prompt)
        # Clean the response to ensure it's valid JSON
        cleaned_response_text = response.text.strip().replace("```json", "").replace("```", "")
        if not cleaned_response_text:
            return [] # Return empty list if the model gives an empty response
        extracted_arguments = json.loads(cleaned_response_text)
        if isinstance(extracted_arguments, list):
            return  extracted_arguments
        else:
            print(f"Warning: API returned a non-list object: {type(extracted_arguments)}")
            return []
    except (json.JSONDecodeError, ValueError) as e:
        print(f"Error parsing JSON from API response: {e}")
        raw_text = getattr(response, 'text', 'Response blocked or empty.')
        print(f"Raw response was: {raw_text}")
        return []
    except Exception as e:
        print(f"An unexpected error occurred during API call: {e}")
        return []

# ==============================================================================
# STEP 3: MAIN SCRIPT TO PROCESS FILES AND BUILD DATABASE
# ==============================================================================

# Using the folder path you provided in your last script.
FOLDER_PATH = 'jus_mundi_hackathon_data/cases/'
all_extracted_arguments = []

if not os.path.exists(FOLDER_PATH):
    print(f"Error: Folder not found at '{FOLDER_PATH}'. Please check the path.")
else:
    json_files = sorted([f for f in os.listdir(FOLDER_PATH) if f.endswith('.json')])
    
    # --- TESTING LIMIT ---
    # Processing only the first 10 files as requested.
    # To run on all ~600 files, REMOVE the slicing '[:10]' from the line below.
    files_to_process = json_files[100:300]
    print(f"Starting processing for {len(files_to_process)} test files...")

    for filename in tqdm(files_to_process, desc="Processing Files"):
        file_path = os.path.join(FOLDER_PATH, filename)
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                case_data = json.load(f)

            case_identifier = case_data.get("Identifier", "Unknown_ID")
            case_title = case_data.get("Title", "Unknown_Title")
            
            print(f"\nProcessing Case: {case_title} | ID: {case_identifier}")

            # Iterate through all decisions and opinions in the case
            for document in case_data.get("Decisions", []):
                # We can process the main decision and its opinions in the same sub-loop
                all_docs_to_analyze = [document] + document.get("Opinions", [])
                
                for doc in all_docs_to_analyze:
                  if doc and doc.get("Content"):
                      doc_text = doc["Content"]
                      doc_title = doc.get("Title", "N/A")
                      doc_type = doc.get("Type", "N/A")
                      
                      # A small delay to avoid hitting API rate limits, especially on long runs.
                      time.sleep(0.1)

                      print(f"  - Analyzing: '{doc_title}' ({doc_type})")
                      arguments = analyze_decision_text(doc_text)
                      
                      for arg in arguments:
                          # Add all relevant metadata to each extracted argument object
                          arg['case_identifier'] = case_identifier
                          arg['case_title'] = case_title
                          arg['document_title'] = doc_title
                          arg['document_type'] = doc_type
                          all_extracted_arguments.append(arg)
                          
        except Exception as e:
            print(f"Could not process file {filename}. Error: {e}")

# ==============================================================================
# STEP 4: CREATE AND SAVE THE DATABASE
# ==============================================================================
print("\nProcessing complete. Creating DataFrame from extracted arguments.")
df = pd.DataFrame(all_extracted_arguments)

if not df.empty:
    # --- UPDATED: Column order for rich context ---
    column_order = [
        'case_identifier', 
        'case_title', 
        'document_title', 
        'document_type', 
        'party', 
        'argument_summary', 
        'legal_basis',
        'key_keywords',
        'court_followed', 
        'tribunal_reasoning'
    ]
    # Ensure all columns exist before trying to reorder, filling missing ones with None
    for col in column_order:
        if col not in df.columns:
            df[col] = None 
            
    df = df[column_order]
    
    # Using a new filename to reflect the richer content
    output_filename = 'legal_arguments_database.csv'
    df.to_csv(output_filename, index=False, encoding='utf-8-sig')

    print(f"\nDatabase created successfully with {len(df)} arguments.")
    print(f"Saved to '{output_filename}'")
    
    # Display the first 15 rows of the new database
    display(df.head(15))
else:
    print("No arguments were extracted. The DataFrame is empty.")

Vertex AI initialized for project: hack-thelaw25cam-586
Setup complete. Gemini model via Vertex AI is ready.
Starting processing for 200 test files...


Processing Files:   0%|          | 0/200 [00:00<?, ?it/s]


Processing Case: Bacilio Amorrortu v. Peru (II) | ID: 
  - Analyzing: 'Procedural Order No. 2 (Decision on Bifurcation)' (Bifurcation)

Processing Case: Itisaluna Iraq and others v. Iraq | ID: IDS-796
  - Analyzing: 'Award' (Award (Final))
  - Analyzing: 'Dissenting Opinion of Wolfgang Peter (Incorporated in Final Award)' (Dissenting Opinion)

Processing Case: Watkins Holdings v. Spain | ID: IDS-673
  - Analyzing: 'Award' (Award (Final))
  - Analyzing: 'Dissent on Liability and Quantum of Prof. Dr. Hélène Ruiz Fabri' (Dissenting Opinion)
  - Analyzing: 'Decision on Spain's Request for Rectification of the Award' (Supplementary)

Processing Case: WCV and Channel Crossings v. Czech Republic | ID: IDS-694
  - Analyzing: 'Final Award' (Award (Final))

Processing Case: A11Y v. Czech Republic | ID: IDS-578
  - Analyzing: 'Decision on Bifurcation' (Bifurcation)
  - Analyzing: 'Decision on Jurisdiction' (Jurisdiction)
  - Analyzing: 'Award' (Award (Final))

Processing Case: Anglo American v. 

Unnamed: 0,case_identifier,case_title,document_title,document_type,party,argument_summary,legal_basis,key_keywords,court_followed,tribunal_reasoning
0,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Claimant,The Claimant argues that the Respondent's obje...,USPTPA Art. 10.20.4,"[bifurcation, jurisdictional objections, Artic...",Yes,The Tribunal agreed that the objections were n...
1,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Claimant,The Claimant argues that the Respondent's obje...,"UNCITRAL Rules Art. 23(3), Glamis Gold test","[bifurcation, UNCITRAL Rules, Glamis Gold, fri...",Yes,The Tribunal agreed that bifurcation was not w...
2,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Claimant,The Claimant argues that the time-bar objectio...,USPTPA Art. 10.18.1,"[time-bar, limitation period, knowledge of bre...",Yes,The Tribunal agreed that the time-bar objectio...
3,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Claimant,The Claimant argues that the consultation and ...,"USPTPA Art. 10.15, USPTPA Art. 10.16","[consultation and negotiation, pre-arbitral re...",Yes,The Tribunal agreed that the analysis required...
4,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Claimant,The Claimant argues that the notice of intent ...,USPTPA Art. 10.16.2,"[notice of intent, pre-arbitral requirements, ...",Yes,The Tribunal agreed that adjudicating Objectio...
5,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Respondent,The Respondent argues that the Claimant's clai...,USPTPA Art. 10.18.1,"[time-bar, limitation period, knowledge of bre...",No,The Tribunal found that the time-bar objection...
6,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Respondent,The Respondent argues that the Claimant failed...,"USPTPA Art. 10.15, USPTPA Art. 10.16","[consultation and negotiation, pre-arbitral re...",No,The Tribunal found that the analysis required ...
7,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Respondent,The Respondent argues that the Claimant failed...,USPTPA Art. 10.16.2,"[notice of intent, pre-arbitral requirements, ...",No,The Tribunal found that adjudicating Objection...
8,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Respondent,The Respondent argues that the Tribunal should...,USPTPA Art. 10.20.4,"[bifurcation, jurisdictional objections, Artic...",No,The Tribunal determined that the objections we...
9,,Bacilio Amorrortu v. Peru (II),Procedural Order No. 2 (Decision on Bifurcation),Bifurcation,Respondent,"The Respondent argues that, in the alternative...","UNCITRAL Rules Art. 23(3), Glamis Gold test","[bifurcation, UNCITRAL Rules, Glamis Gold, sub...",No,The Tribunal agreed that bifurcation was not w...


In [42]:
# ==============================================================================
# STRATEGY ANALYSIS SCRIPT (WITH SOURCE TEXT HIGHLIGHTING)
# ==============================================================================
import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig, HarmCategory, HarmBlockThreshold
import json

# --- Configuration & Initialization for Vertex AI ---
PROJECT_ID = "hack-thelaw25cam-586"
LOCATION = "us-central1"
vertexai.init(project=PROJECT_ID, location=LOCATION)

# --- Model Configuration for Reasoning Task ---
generation_config = GenerationConfig(
    temperature=0.3,
    top_p=0.95,
    top_k=40,
    max_output_tokens=8192,
)

safety_settings = {
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}

# --- Using the user-specified model ---
# NOTE: "gemini-2.0-flash-lite-001" is not a standard public model.
# If this causes a "model not found" error, please change it to "gemini-1.5-pro-001".
MODEL_NAME = "gemini-2.0-flash-lite-001"
try:
    reasoning_model = GenerativeModel(
        MODEL_NAME,
        generation_config=generation_config,
        safety_settings=safety_settings
    )
    print(f"Vertex AI Initialized. Using user-specified model: '{MODEL_NAME}'.")
except Exception as e:
    print(f"ERROR: Could not initialize model '{MODEL_NAME}'. This model name may not be valid or accessible to your project.")
    print(f"Please use a standard model like 'gemini-1.5-pro-001'. Original error: {e}")
    raise


# ==============================================================================
# SELF-CONTAINED FUNCTION TO ANALYZE ARBITRATION STRATEGY
# ==============================================================================

def analyze_arbitration_strategy(strategy_text: str) -> str:
    """
    Takes a user's arbitration strategy, decomposes it into individual arguments, adds a title,
    classifies each argument, and highlights the source text for each argument.
    """
    if not strategy_text or not strategy_text.strip():
        return json.dumps({"error": "Strategy text cannot be empty"}, indent=4)

    # --- UPDATED PROMPT: Added 'source_text' requirement ---
    prompt = [
        "You are an expert legal counsel in international investment arbitration. Your task is to analyze the following proposed case strategy and break it down into its core, distinct legal arguments.",
        "For each distinct argument you identify, you must provide:",
        "1. `title`: A very short, 3-4 word title for the argument (e.g., 'Statute of Limitations Bar').",
        "2. `argument`: Formulate the argument as a concise, standalone statement.",
        "3. `category`: Classify the argument into one of three categories: 'Jurisdiction', 'Admissibility', or 'Merits'.",
        "4. `source_text`: Quote *verbatim* the exact sentence or sentences from the original input text that directly form the basis for this argument.",

        "Provide your output as a valid JSON array of objects. Do not include any other text, explanation, or markdown formatting. Each object must have these exact keys: `title`, `argument`, `category`, and `source_text`.",
        "Example output format:",
        """
[
  {
    "title": "Time-Barred Claim",
    "argument": "The claim is inadmissible because it was filed more than three years after the claimant knew about the alleged damages, exceeding the treaty's statute of limitations.",
    "category": "Admissibility",
    "source_text": "The claimant clearly knew about the alleged damages for over five years before filing, which is well past the three-year time limit specified in the treaty's dispute resolution clause."
  }
]
        """,
        "Here is the strategy text to analyze:",
        "---",
        strategy_text
    ]

    try:
        response = reasoning_model.generate_content(prompt)
        text_response = response.text
        # Robustly find and extract the JSON array from the model's response
        start = text_response.find('[')
        end = text_response.rfind(']')
        
        if start != -1 and end != -1:
            json_str = text_response[start:end+1]
            parsed_json = json.loads(json_str)
            return json.dumps(parsed_json, indent=4)
        else:
            return json.dumps({"error": "Could not find a valid JSON array in the model's response.", "raw_response": text_response}, indent=4)
            
    except json.JSONDecodeError as e:
        return json.dumps({"error": f"Failed to decode JSON after cleaning: {e}", "raw_response": response.text}, indent=4)
    except Exception as e:
        return json.dumps({"error": f"An unexpected error occurred: {str(e)}"}, indent=4)


# ==============================================================================
# EXAMPLE USAGE
# ==============================================================================

# --- Define a sample claimant's strategy ---
user_strategy = """
Our primary strategy is to first challenge the tribunal's jurisdiction. We will argue that the claimant's company, while incorporated locally, is ultimately owned by nationals of the host state, so there's no real foreign investment to protect. 
If that fails, we will argue that the claim itself is inadmissible. The claimant clearly knew about the alleged damages for over five years before filing, which is well past the three-year time limit specified in the treaty's dispute resolution clause.
Finally, on the substance of the case, we will demonstrate that the government's actions were legitimate, non-discriminatory regulatory measures designed to protect public health and did not amount to an expropriation of the claimant's investment. We will also contest their damages model, showing it's based on overly optimistic and speculative future profits.
"""

# --- Execute the analysis ---
analyzed_strategy_json = analyze_arbitration_strategy(user_strategy)

# --- Save the JSON string to a file ---
output_filename = "input_arguments.json"
with open(output_filename, 'w', encoding='utf-8') as f:
    f.write(analyzed_strategy_json)

print(f"Strategy analysis complete. Results saved to '{output_filename}'")

# --- Print the JSON to the console for immediate viewing ---
print("\n--- Decomposed Strategy with Source Highlighting ---")
print(analyzed_strategy_json)

Vertex AI Initialized. Using user-specified model: 'gemini-2.0-flash-lite-001'.
Strategy analysis complete. Results saved to 'input_arguments.json'

--- Decomposed Strategy with Source Highlighting ---
[
    {
        "title": "No Foreign Investment",
        "argument": "The tribunal lacks jurisdiction because the claimant company is ultimately owned by nationals of the host state, thus there is no foreign investment.",
        "category": "Jurisdiction",
        "source_text": "We will argue that the claimant's company, while incorporated locally, is ultimately owned by nationals of the host state, so there's no real foreign investment to protect."
    },
    {
        "title": "Time-Barred Claim",
        "argument": "The claim is inadmissible because it was filed more than three years after the claimant knew about the alleged damages, exceeding the treaty's statute of limitations.",
        "category": "Admissibility",
        "source_text": "The claimant clearly knew about the all

In [52]:
# ==============================================================================
# ADVANCED STRATEGY ANALYSIS SCRIPT
# (With Factual Verification and Argument Discovery)
# ==============================================================================
import vertexai
from vertexai.generative_models import GenerativeModel, GenerationConfig, HarmCategory, HarmBlockThreshold
import json

# --- Configuration & Initialization for Vertex AI ---
PROJECT_ID = "hack-thelaw25cam-586"
LOCATION = "us-central1"
vertexai.init(project=PROJECT_ID, location=LOCATION)

# --- Model Configuration ---
generation_config = GenerationConfig(
    temperature=0.4, # Slightly higher temperature for more creative discovery
    top_p=0.95,
    top_k=40,
    max_output_tokens=8192,
)

safety_settings = {
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}

# --- Using the user-specified model ---
# NOTE: "gemini-2.0-flash-lite-001" is not a standard public model.
# If this causes a "model not found" error, switch to "gemini-1.5-pro-001" which is better for this task.
MODEL_NAME = "gemini-2.0-flash-lite-001"
try:
    reasoning_model = GenerativeModel(
        MODEL_NAME,
        generation_config=generation_config,
        safety_settings=safety_settings
    )
    print(f"Vertex AI Initialized. Using user-specified model: '{MODEL_NAME}'.")
except Exception as e:
    print(f"ERROR: Could not initialize model '{MODEL_NAME}'. This model name may not be valid or accessible to your project.")
    print(f"Please use a standard model like 'gemini-1.5-pro-001'. Original error: {e}")
    raise


# ==============================================================================
# SELF-CONTAINED FUNCTION TO ANALYZE ARBITRATION STRATEGY AGAINST FACTS
# ==============================================================================

def analyze_arbitration_strategy(strategy_text: str, factual_text: str) -> str:
    """
    Analyzes a legal strategy against a factual background to verify arguments,
    find contradictions, and discover new potential arguments.
    """
    if not strategy_text or not factual_text:
        return json.dumps({"error": "Both strategy_text and factual_text must be provided"}, indent=4)

    # --- UPDATED PROMPT: Incorporates factual check and argument discovery ---
    prompt = [
        "You are an expert legal counsel in international investment arbitration. You will be given two pieces of text: a 'CASE STRATEGY' and a 'FACTUAL BACKGROUND'.",
        "Your task is to perform a comprehensive analysis and produce a single JSON array containing all identified arguments. You must perform three steps:",
        "1. Deconstruct the user's 'CASE STRATEGY' into its core arguments.",
        "2. For each of those arguments, cross-reference it with the 'FACTUAL BACKGROUND' and your knowledge of international arbitration to check for accuracy. Be aware that arguments from the users strategy might be factually incorect",
        "3. Identify any NEW potential arguments (for or against the user's position) that are suggested by the 'FACTUAL BACKGROUND' but were NOT mentioned in the 'CASE STRATEGY'.",

        "For EACH argument in the final JSON output, you must provide:",
        " - `title`: A very short, 3-4 word title for the argument.",
        " - `argument`: A concise, standalone statement of the argument.",
        " - `category`: Classify as 'Jurisdiction', 'Admissibility', or 'Merits'.",
        " - `factual_check`: true if it is consistent with the factual background or provide a short correction (e.g., 'Correction: The contract specifies a 90-day notice period, not 60.'). Be aware that the user may need to be corrected",
        " - `source_text`: For arguments from the user's strategy, quote the verbatim source sentence(s). For newly discovered arguments, this should be 'N/A'.",
        " - `is_new_argument`: A boolean value. `false` for arguments from the user's strategy, `true` for newly discovered arguments.",

        "Provide your output as a single, valid JSON array of objects only. Do not add explanations outside the JSON.",
        "Example output format:",
        """
[
  {
    "title": "Legitimate Regulatory Action",
    "argument": "The government's actions were legitimate regulatory measures for public health and not expropriation.",
    "category": "Merits",
    "factual_check": "true",
    "source_text": "we will demonstrate that the government's actions were legitimate, non-discriminatory regulatory measures designed to protect public health and did not amount to an expropriation of the claimant's investment.",
    "is_new_argument": false
  },
  {
    "title": "New Fork-in-the-Road Argument",
    "argument": "The claimant may be barred from arbitration because they first initiated proceedings regarding the same dispute in the host state's local courts.",
    "category": "Admissibility",
    "factual_check": "true",
    "source_text": "N/A",
    "is_new_argument": true
  }
]
        """,
        "--- CASE STRATEGY ---",
        strategy_text,
        "--- FACTUAL BACKGROUND ---",
        factual_text
    ]

    try:
        response = reasoning_model.generate_content(prompt)
        text_response = response.text
        start = text_response.find('[')
        end = text_response.rfind(']')
        
        if start != -1 and end != -1:
            json_str = text_response[start:end+1]
            parsed_json = json.loads(json_str)
            return json.dumps(parsed_json, indent=4)
        else:
            return json.dumps({"error": "Could not find a valid JSON array in the model's response.", "raw_response": text_response}, indent=4)
            
    except json.JSONDecodeError as e:
        return json.dumps({"error": f"Failed to decode JSON after cleaning: {e}", "raw_response": response.text}, indent=4)
    except Exception as e:
        return json.dumps({"error": f"An unexpected error occurred: {str(e)}"}, indent=4)


# ==============================================================================
# EXAMPLE USAGE
# ==============================================================================

# --- 1. Define a sample case strategy ---
user_strategy = """
Our main line of attack will be on admissibility of the case for solar investment in spain. The claimant waited far too long to file this claim, well past the two-year prescription period mentioned in the treaty. We will also argue on the merits that our new tax law was a general measure that affected all companies equally and was not targeted at this foreign investor, so it cannot be a breach of FET. Also this is the wrong court they are addressing with this arbitration suit, they should have adressed the court for Kosovo.
"""

# --- 2. Define the supporting factual material ---
# This text will be used to verify the strategy and find new arguments.
factual_background = """
Excerpts from Bilateral Investment Treaty (BIT) Article 9: 'A claim must be submitted to arbitration within three (3) years from the date on which the claimant first acquired knowledge of the breach.'
Excerpts from BIT Article 11: 'This treaty's protections shall not apply if the investment was made through misrepresentation or concealment of material facts.'
Internal Memo dated 2018: 'The claimant's initial application for the investment license from 2015 appears to omit any mention of their prior environmental violations in their home country.'
"""

# --- 3. Execute the analysis ---
analyzed_strategy_json = analyze_arbitration_strategy(user_strategy, factual_background)

# --- 4. Save and Print the results ---
output_filename = "strategy_analysis_fact_checked.json"
with open(output_filename, 'w', encoding='utf-8') as f:
    f.write(analyzed_strategy_json)

print(f"Strategy analysis complete. Results saved to '{output_filename}'")

print("\n--- Fact-Checked & Augmented Strategy ---")
print(analyzed_strategy_json)

Vertex AI Initialized. Using user-specified model: 'gemini-2.0-flash-lite-001'.
Strategy analysis complete. Results saved to 'strategy_analysis_fact_checked.json'

--- Fact-Checked & Augmented Strategy ---
[
    {
        "title": "Time-barred Claim",
        "argument": "The claimant's claim is inadmissible because it was filed outside the three-year prescription period stipulated in the BIT.",
        "category": "Admissibility",
        "factual_check": "true",
        "source_text": "The claimant waited far too long to file this claim, well past the two-year prescription period mentioned in the treaty.",
        "is_new_argument": false
    },
    {
        "title": "General Tax Measure",
        "argument": "The new tax law was a general measure applied equally to all companies and did not specifically target the foreign investor, therefore it does not breach FET.",
        "category": "Merits",
        "factual_check": "true",
        "source_text": "We will also argue on the mer