In [19]:
from pydantic import BaseModel, Field
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

In [2]:
from typing import List, Optional
from langchain_core.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain.chat_models import init_chat_model
from src.pydantic_classes import StudyExtraction, Standard_gene, Cancer_gene

# Initialise LLM (Gemini 2.5)
llm = init_chat_model(
    model="gemini-2.5-flash",
    model_provider="google_genai",
    temperature=0.0
)

# Canonical user input
paragraph = (
    "I identified these five genes to be significantly more mutated than expected by chance in my cohort of human brain cancer patients: TP53, AKT3, EGFR, ATRX and PDX1."
)

#alternative implementation to parse as pydantic more robustly
parser = PydanticOutputParser(pydantic_object=StudyExtraction)
format_instructions = parser.get_format_instructions()

prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract per schema:\n{format_instructions}"),
    ("human", "{paragraph}"),
]).partial(format_instructions=format_instructions)
parsing_llm = prompt | llm | parser

# pass raw user input "paragraph"
parsed_input = parsing_llm.invoke({"paragraph": paragraph})
#convert to JSON string
json_output = parsed_input.model_dump_json(indent=2)
print(json_output)

#next step 

ModuleNotFoundError: No module named 'langchain_core'