In [2]:
import json
with open('asqa/ASQA.json', 'rb') as f:
    dataset = json.load(f)

In [3]:
print(f'dev set size: {len(dataset['dev'])}')
print(f'train set size: {len(dataset['train'])}')

dev set size: 948
train set size: 4353


In [24]:
list_i = 1

item = list(dataset['train'].values())[list_i]
for key in item.keys():
    print(key)
    if isinstance(item[key], list):
        for i in item[key]:
            print(f'\t{i}')
    else:
        print(f'\t{item[key]}')

ambiguous_question
	Who won the 2016 ncaa football national championship?
qa_pairs
	{'question': "Who won the 2016 season's ncaa football national championship?", 'short_answers': ['Clemson Tigers', '2016 Clemson Tigers football team', '2016 Clemson Tigers football', 'the Tigers', 'Clemson', 'Clemson University'], 'context': "The 13–1 Alabama Crimson Tide won the game, holding off the undefeated Clemson Tigers 45–40 in the fourth quarter. Accompanied by a talented receiving corps, Clemson's Heisman Finalist quarterback Deshaun Watson had a historic performance, setting the record for most total yards in national championship game history, with 478 yards (405 passing / 73 rushing) against the nation's third-ranked defense in Alabama, breaking the record previously set by Vince Young in the 2006 Rose Bowl. Following the game, the AP Poll also named Alabama as its top team of the season, giving Alabama their fourth title in seven seasons. Both Clemson and Alabama finished the season 14–1.

In [18]:
from interlinked import AI
from interlinked.core.clients.googleaiclient import GoogleAIClient
from typing import List
import json
import re


def create_disambiguation_prompt(ambiguous_question: str) -> str:
    """
    Create a prompt for disambiguating an ambiguous question using few-shot examples.
    
    Args:
        ambiguous_question: The ambiguous question to disambiguate
        
    Returns:
        A formatted prompt for the LLM
    """
    
    prompt = f"""You are an expert at identifying ambiguity in questions and generating clarifying questions to break down the original query and resolve that ambiguity. 
Given a question, your task is as follows:
1. Decide whether the question needs disambiguation, if not, output [CONTINUE].
2. If the question needs disambiguation, output [CLARIFY] token, followed by 2-4 specific clarifying questions.

Use the following examples:

==========================
Question: When does the new bunk'd come out?
Output:[CLARIFY]
- When does episode 42 of bunk'd come out?
- When does episode 41 of bunk'd come out?
- When does episode 40 of bunk'd come out?

==========================
Question: Who won the 2016 ncaa football national championship?
Output:[CLARIFY]
- Who won the 2016 season's ncaa football national championship?
- Who won the ncaa football national championship played in 2016?

==========================
Question: When was the last time the death penalty was used in pa?
Output:[CLARIFY]
- As of 2017, when was the last time the death penalty was carried out in PA?
- As of 2016, when was the last time the death penalty was carried out in PA?
- As of 2015, when was the last time the death penalty was carried out in PA?

==========================
Question: Where does failure of the left ventricle cause increased pressure?
Output: [CONTINUE]

Now, given the following ambiguous question, generate 2-4 specific clarifying questions that would help resolve the ambiguity. Focus on the key ambiguous elements like:
- Time references (which year, season, specific dates)
- Specific entities, episodes, or versions
- Context or scope differences
- Different interpretations of the same terms

Format of your response: 
1. First generate the [CONTINUE]/[CLARIFY] token. 
2. If clarification is needed, format the clarifying questions as a simple list with each question on a new line starting with a dash (-).
3. Phrase the questions as OBJECTIVE questions starting with when, who, what, etc. Do not EXPECT a user reply using questions like: "are you refering to ..." or "what do you mean...".

Question: {ambiguous_question}
Output:"""

    return prompt


def disambiguate_question(ambiguous_question: str, api_key: str = "in-8LxOfglvSxalWFfVDbd7ug") -> List[str]:
    """
    Disambiguate an ambiguous question by generating clarifying questions.
    
    Args:
        ambiguous_question: The ambiguous question to disambiguate
        api_key: Google AI API key (default uses the same key from run_interlinked.py)
        
    Returns:
        A list of clarifying questions that help resolve the ambiguity
    """
    
    # Create the disambiguation prompt
    prompt = create_disambiguation_prompt(ambiguous_question)
    
    # Initialize the Google AI client
    client = GoogleAIClient(model_name='gemini-2.5-flash', api_key=api_key)
    
    try:
        # Call the LLM using the interlinked library
        observation = AI.ask(prompt=prompt, client=client)
        response = observation.response
        
        return response
        # Parse the response to extract clarifying questions
        clarifying_questions = []
        
        # Split response by lines and extract questions
        lines = response.strip().split('\n')
        for line in lines:
            line = line.strip()
            # Look for lines that start with - or are formatted as questions
            if line and (line.startswith('-') or line.startswith('•')):
                # Clean up the question
                question = line.lstrip('- •').strip()
                if question and (question.endswith('?') or 'when' in question.lower() or 'who' in question.lower() or 'what' in question.lower() or 'where' in question.lower() or 'how' in question.lower() or 'why' in question.lower()):
                    clarifying_questions.append(question)
        
        # If no questions found with bullet points, try to extract questions directly
        if not clarifying_questions:
            # Look for sentences ending with question marks
            sentences = re.split(r'[.!?]+', response)
            for sentence in sentences:
                sentence = sentence.strip()
                if sentence and (sentence.endswith('?') or any(word in sentence.lower() for word in ['when', 'who', 'what', 'where', 'how', 'why'])):
                    if not sentence.endswith('?'):
                        sentence += '?'
                    clarifying_questions.append(sentence)
        
        return clarifying_questions
        
    except Exception as e:
        print(f"Error calling LLM: {e}")
        return []

In [19]:
list_i = 0

item = list(dataset['dev'].values())[list_i]
print(f'ambiguous_question: {item['ambiguous_question']}')
print(f'Ground-truth Clarifying questions: ')
for q in [qa['question'] for qa in item['qa_pairs']]:
    print(f'\t{q}')
    
print('-------------------------')
print('GENERATED:')
generated_cqs = disambiguate_question(item['ambiguous_question'])
print(generated_cqs)

ambiguous_question: Who has the highest goals in world football?
Ground-truth Clarifying questions: 
	Who has the highest goals in men's world international football?
	Who has the highest goals all-time in men's football?
	Who has the highest goals in women's world international football?
-------------------------
GENERATED:
{
  "output": "[CLARIFY]\n- Who has the highest all-time career goals in men's professional football?\n- Who has the highest goals in a single calendar year in men's professional football?\n- Who has the highest all-time career goals in women's international football?"
}


In [20]:
list_i = 1

item = list(dataset['dev'].values())[list_i]
print(f'ambiguous_question: {item['ambiguous_question']}')
print(f'Ground-truth Clarifying questions: ')
for q in [qa['question'] for qa in item['qa_pairs']]:
    print(f'\t{q}')
    
print('-------------------------')
print('GENERATED:')
generated_cqs = disambiguate_question(item['ambiguous_question'])
print(generated_cqs)

ambiguous_question: Who is the original artist of sound of silence?
Ground-truth Clarifying questions: 
	Who is the original artist of sound of silence, the song, released in 1964?
	Who is the original artist of sound of silence, the album?
	Who is the original artist of sound of silence, the song, released in 2016?
-------------------------
GENERATED:
CONTINUE


In [21]:
list_i = 2

item = list(dataset['dev'].values())[list_i]
print(f'ambiguous_question: {item['ambiguous_question']}')
print(f'Ground-truth Clarifying questions: ')
for q in [qa['question'] for qa in item['qa_pairs']]:
    print(f'\t{q}')
    
print('-------------------------')
print('GENERATED:')
generated_cqs = disambiguate_question(item['ambiguous_question'])
print(generated_cqs)

ambiguous_question: When was the first apple i phone made?
Ground-truth Clarifying questions: 
	When was the first apple i phone released?
	When was the first apple i phone for beta testing made?
	When was the first apple i phone 1 made?
	When was the first apple i phone beta made?
-------------------------
GENERATED:
{
  "token": "[CLARIFY]",
  "questions": [
    "When was the first Apple iPhone announced?",
    "When was the first Apple iPhone released to the public?",
    "When did Apple begin manufacturing the first iPhone model?"
  ]
}


In [22]:
list_i = 3

item = list(dataset['dev'].values())[list_i]
print(f'ambiguous_question: {item['ambiguous_question']}')
print(f'Ground-truth Clarifying questions: ')
for q in [qa['question'] for qa in item['qa_pairs']]:
    print(f'\t{q}')
    
print('-------------------------')
print('GENERATED:')
generated_cqs = disambiguate_question(item['ambiguous_question'])
print(generated_cqs)

ambiguous_question: Who played the weasley brothers in harry potter?
Ground-truth Clarifying questions: 
	Who played  Bill weasley in Harry Potter and the Prisoner of Azkaban?
	Who played percy weasley in harry potter?
	Who played fred weasley in harry potter?
	Who played ron weasley in harry potter?
	Who played george weasley in harry potter?
	Who played  Bill weasley in harry potter (2001-2011)?
-------------------------
GENERATED:
[
  "CLARIFY",
  "- Who played Fred Weasley in Harry Potter?",
  "- Who played George Weasley in Harry Potter?",
  "- Who played Ron Weasley in Harry Potter?",
  "- Who played Percy Weasley in Harry Potter?"
]
