# Extract Input Findings

**Goal:** Extract abnormal findings from user input for each organs

In [23]:
from typing import Optional, List
from langchain_openai import ChatOpenAI
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate

In [2]:
llm = ChatOpenAI(model="gpt-3.5-turbo")

## User Input

In [24]:
# User Text to Extract
user_text1 = """Generate US report with these findings:
- Mild fatty liver
- 2-mm left renal stone, 5-mm right renal cyst
"""

## Schema

In [32]:
class Organ(BaseModel):
    """Base class for organ-related information"""
    finding: Optional[str] = Field(default=None, description="")

    def __init__(self, **data):
        super().__init__(**data)
        # Dynamically set the description
        cls_nm = self.__class__.__name__
        self.__fields__["finding"].field_info.description = f"Abnormal finding for the {cls_nm}. If findings about {cls_nm} is not provided or {cls_nm} is normal, return `None`."

    class Config:
        # This ensures that the fields are allowed to be inherited and validated correctly.
        allow_population_by_field_name = True
        
class Liver(Organ):
    """Information about Liver finding"""
 

class Kidney(Organ):
    """Information about Kidney finding"""
   
class GallBladder(Organ):
    """Information about GallBladder finding"""
    

class Findings(BaseModel):
    """Extracted information from each organs."""
    # Creates a model so that we can extract multiple entities.
    abnormal_liver: List[Liver]
    abnormal_kidney: List[Kidney]
    abnormal_gallbladder: List[GallBladder]
    
    def to_dict(self):
        return {
            "abnormal_liver": [sub.finding for sub in self.abnormal_liver],
            "abnormal_kidney": [sub.finding for sub in self.abnormal_kidney],
            "abnormal_gallbladder": [sub.finding for sub in self.abnormal_gallbladder],
        }

# Example usage
liver_instance = Liver()
kidney_instance = Kidney()
gallbladder_instance = GallBladder()

print(liver_instance.__fields__["finding"].field_info.description)  
print(kidney_instance.__fields__["finding"].field_info.description)  
print(gallbladder_instance.__fields__["finding"].field_info.description)

Abnormal finding for the Liver. If findings about Liver is not provided or Liver is normal, return `None`.
Abnormal finding for the Kidney. If findings about Kidney is not provided or Kidney is normal, return `None`.
Abnormal finding for the GallBladder. If findings about GallBladder is not provided or GallBladder is normal, return `None`.


## Extractor

In [18]:
# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return `None` for the attribute's value.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{input_text}"),
    ]
)

## Create Chain & Execute

### Single Extract 

In [20]:
runnable_liver = prompt | llm.with_structured_output(schema=Liver)
runnable_kidney = prompt | llm.with_structured_output(schema=Kidney)
runnable_gallbladder = prompt | llm.with_structured_output(schema=GallBladder)

In [33]:
# User Text to Extract
print(user_text1)


Generate US report with these findings:
- Mild fatty liver
- 2-mm left renal stone, 5-mm right renal cyst



In [25]:
# Liver Findings
liver1 = runnable_liver.invoke({"input_text": user_text1})
print(liver1)

# Kidney Findings
kidney1 = runnable_kidney.invoke({"input_text": user_text1})
print(kidney1)

# Gallbladder Findings
gallbladder1 = runnable_gallbladder.invoke({"input_text": user_text1})
print(gallbladder1)

finding='Mild fatty liver'
finding='2-mm left renal stone, 5-mm right renal cyst'
finding=None


### Multiple Extract

In [34]:
runnable = prompt | llm.with_structured_output(schema=Findings)

In [35]:
res = runnable.invoke({"input_text": user_text1})
res

Findings(abnormal_liver=[Liver(finding='Mild fatty liver')], abnormal_kidney=[Kidney(finding='2-mm left renal stone'), Kidney(finding='5-mm right renal cyst')], abnormal_gallbladder=[])

In [38]:
res.to_dict()

{'abnormal_liver': ['Mild fatty liver'],
 'abnormal_kidney': ['2-mm left renal stone', '5-mm right renal cyst'],
 'abnormal_gallbladder': []}

## Final Wrapper

In [14]:
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate


def get_findings(input_text,
                 llm=ChatOpenAI(model="gpt-3.5-turbo")):
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are an expert extraction algorithm. "
                "Only extract relevant information from the text. "
                "If you do not know the value of an attribute asked to extract, "
                "return `None` for the attribute's value.",
            ),
            # Please see the how-to about improving performance with
            # reference examples.
            # MessagesPlaceholder('examples'),
            ("human", "{input_text}"),
        ])

    runnable = prompt | llm.with_structured_output(schema=Findings)
    res = runnable.invoke({"input_text": input_text})
    return res


In [15]:
get_findings("Fatty liver, 2-cm renal cyst")

Findings(liver_findings=[Liver(finding='Fatty liver')], kidney_findings=[Kidney(finding='2-cm renal cyst')], gallbladder_findings=[])