# Research: Extracting information from text
This notebook demonstrates how to use the `QuestionExtract` class to return information extracted (or extrapolated) from a given text.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/expectedparrot/edsl/blob/main/docs/notebooks/question_extract_example.ipynb)

In [1]:
# ! pip install edsl

In [2]:
from edsl.questions import QuestionList, QuestionExtract

In [3]:
QuestionList.example()

In [4]:
QuestionExtract.example()

In [5]:
q1 = QuestionList(
    question_name = "list",
    question_text = "What are the most popular programming languages in order of popularity?",
)

r1 = q1.run()

In [6]:
r1.select("list").print()

answer.list
"['JavaScript', 'Python', 'Java', 'C#', 'PHP', 'TypeScript', 'C++', 'C', 'Ruby', 'Swift']"


In [7]:
languages = r1.select("list").to_list()[0]
languages

['JavaScript',
 'Python',
 'Java',
 'C#',
 'PHP',
 'TypeScript',
 'C++',
 'C',
 'Ruby',
 'Swift']

In [8]:
q2 = QuestionExtract(
    question_name = "extract",
    question_text = "The most popular programming languages in order of popularity are: " + ", ".join(languages),
    answer_template = {"second_most_popular": "name", "least_popular": "name", "most_popular": "name"},
)

r2 = q2.run()

In [9]:
r2.select("extract").print()

answer.extract
"{'second_most_popular': 'Python', 'least_popular': 'Swift', 'most_popular': 'JavaScript'}"


In [10]:
r2.select("prompt.*").print()

prompt.extract_user_prompt,prompt.extract_system_prompt
"{'text': 'You are given the following input: ""The most popular programming languages in order of popularity are: JavaScript, Python, Java, C#, PHP, TypeScript, C++, C, Ruby, Swift"".\nCreate an ANSWER should be formatted like this: ""{\'second_most_popular\': \'name\', \'least_popular\': \'name\', \'most_popular\': \'name\'}"",\nand it should have the same keys but values extracted from the input.\nIf the value of a key is not present in the input, fill with ""null"".\nReturn a valid JSON formatted like this:\n{""answer"": }\nONLY RETURN THE JSON, AND NOTHING ELSE.', 'class_name': 'Extract'}","{'text': 'You are answering questions as if you were a human. Do not break character. You are an agent with the following persona:\n{}', 'class_name': 'AgentInstruction'}"


In [11]:
q3 = QuestionExtract(
    question_name = "create",
    question_text = "Create a list of the most popular programming languages to use in providing your response.",
    answer_template = {"most_popular": "name", "second_most_popular": "name"},
)

r3 = q3.run()

In [12]:
r3.select("create").print()

answer.create
"{'most_popular': 'null', 'second_most_popular': 'null'}"
