In [1]:
from BRAD import llms
llm = llms.load_nvidia()

Enter your NVIDIA API key:  ········


# PyDantic Tutorials

In [3]:
from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field, validator


## Jokes

In [24]:
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")

    # You can add custom validation logic easily with Pydantic.
    @validator("setup")
    def question_ends_with_question_mark(cls, field):
        if field[-1] != "?":
            raise ValueError("Badly formed question!")
        return field

joke_query = "Tell me a new joke about sports"

# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=Joke)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": joke_query})


Joke(setup='Why did the football go to the doctor?', punchline='Because it was feeling a little deflated!')

## Movies

In [27]:
# Here's another example, but with a compound typed field.
class Actor(BaseModel):
    name: str = Field(description="name of an actor")
    film_names: List[str] = Field(description="list of names of films they starred in")


actor_query = "Generate the filmography for the actor from Borat."

parser = PydanticOutputParser(pydantic_object=Actor)

prompt = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | llm | parser

chain.invoke({"query": actor_query})

Actor(name='Sacha Baron Cohen', film_names=['Borat: Cultural Learnings of America for Make Benefit Glorious Nation of Kazakhstan', 'Brüno', 'The Dictator'])

# Parsing Lists

In [32]:
from typing import List
from pydantic import BaseModel
from pydantic import RootModel

In [30]:
class Item(BaseModel):
    thing_number: int
    thing_description: str
    thing_amount: float


In [34]:
class ItemList(BaseModel):
    RootModel: List[Item]
    

In [35]:
just_data = [
    {"thing_number": 123, "thing_description": "duck", "thing_amount": 4.56},
    {"thing_number": 456, "thing_description": "cow", "thing_amount": 7.89},
]
item_list = ItemList(__root__=just_data)

a_json_duck = {"thing_number": 123, "thing_description": "duck", "thing_amount": 4.56}
item_list.__root__.append(a_json_duck)


ValidationError: 1 validation error for ItemList
RootModel
  Field required [type=missing, input_value={'__root__': [{'thing_num... 'thing_amount': 7.89}]}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.6/v/missing

In [None]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate
from langchain_core.prompts.prompt import PromptTemplate

from typing import List

from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field, validator

from BRAD.promptTemplates import plannerTemplate, plannerEditingTemplate

In [3]:
prompt = """/force PLANNER write a 3 step pipeline to 1. perform a lit search of PCNA with the RAG. Use a prompt such as, "What role does PCNA play in the cell cycle and cell cycle imaging?", then 2. run the hwg analysis pipeline to find first order interactions of PCNA loaded from step 2. Next, (3.) Load the output file of the hwg codes (step 2) and perform gene enrichment and identify the pathways PCNA is involved in with enrichr. Finally, write a small report summarizing each of these findings in Step 4"""
memory = ConversationBufferMemory(ai_prefix="BRAD")

In [28]:
template = plannerTemplate()
PROMPT = PromptTemplate(input_variables=["history", "input"], template=template)
conversation = ConversationChain(prompt  = PROMPT,
                                 llm     = llm,
                                 verbose = True,
                                 memory  = memory,
                                )

In [29]:
from pydantic import BaseModel, Field
from typing import List

class PipelineStage(BaseModel):
    step_number: int
    module: str
    prompt: str = Field(..., title="Prompt")
    notes: str = Field("", title="Notes")

    class Config:
        title = "Pipeline Stage"

parser = PydanticOutputParser(pydantic_object=PipelineStage)
chain = conversation | parser

chain.invoke(prompt)



[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3m**INSTRUCTIONS:**
You are planning a bioinformatics analysis pipeline to address a user's query. Your task is to outline a multi-step workflow using the available methods listed below. Each method should be used appropriately to ensure a thorough analysis. For each step in the pipeline, explain the goal of the process as well as provide a prompt to the chatbot that will execute that step. If information is passed between stages, such as from literature or code to databases, indicate the dependencies of steps in the prompt.

**Available Methods:**
1. **Retrieval Augmented Generation**(RAG): Look up literature and documents from a text database.
2. **Web Search for New Literature**(SCRAPE): Search platforms like arXiv, bioRxiv, and PubMed for the latest research.
3. **Bioinformatics Databases**(DATABASE): Utilize databases such as Gene Ontology and Enrichr to perform gene set enrichment analyses.
4.

ValidationError: 1 validation error for Generation
text
  str type expected (type=type_error.str)

In [23]:
pipeline_stages = parse_pipeline_prompt(response)

# Print parsed stages
for stage in pipeline_stages:
    print(f"Step {stage.step_number} ({stage.module}):")
    print(f"Prompt: {stage.prompt}")
    if stage.notes:
        print(f"Notes: {stage.notes}")
    print()


In [24]:
pipeline_stages

[]

In [13]:
while True:
    print('Do you want to proceed with this plan? [Y/N/edit]')
    prompt2 = input('Input >> ')
    if prompt2 == 'Y':
        break
    elif prompt2 == 'N':
        return chatstatus
    else:
        template = plannerEditingTemplate()
        template = template.format(plan=response)
        print(template)
        PROMPT   = PromptTemplate(input_variables=["user_query"], template=template)
        chain    = PROMPT | llm
        
        # Call chain
        response = chain.invoke(prompt2).content.strip() + '\n\n'
        print(response) if chatstatus['config']['debug'] else None
        
processes = response2processes(response)
print(processes) if chatstatus['config']['debug'] else None
chatstatus['planned'] = processes
chatstatus['process']['stages'] = processes




[1m> Entering new ConversationChain chain...[0m
Prompt after formatting:
[32;1m[1;3m**INSTRUCTIONS:**
You are planning a bioinformatics analysis pipeline to address a user's query. Your task is to outline a multi-step workflow using the available methods listed below. Each method should be used appropriately to ensure a thorough analysis. For each step in the pipeline, explain the goal of the process as well as provide a prompt to the chatbot that will execute that step. If information is passed between stages, such as from literature or code to databases, indicate the dependencies of steps in the prompt.

**Available Methods:**
1. **Retrieval Augmented Generation**(RAG): Look up literature and documents from a text database.
2. **Web Search for New Literature**(SCRAPE): Search platforms like arXiv, bioRxiv, and PubMed for the latest research.
3. **Bioinformatics Databases**(DATABASE): Utilize databases such as Gene Ontology and Enrichr to perform gene set enrichment analyses.
4.

SyntaxError: 'return' outside function (2988740407.py, line 17)