In [115]:
# !pip install  langchain_ollama

In [116]:
from typing import List, Optional
from pydantic import BaseModel, Field
from langchain.prompts import PromptTemplate
from langchain_ollama import ChatOllama

In [117]:
with open("modified_text.txt", "r", encoding="utf-8") as f:
    text = f.read()

# 只取前5000个字符
text = text[:8000]

print("原始文本内容示例：")
print(text[:500])  # 预览前500个字符

原始文本内容示例：



ACARBOSE
(ay-kar-bose) Precose®
ORAL ANTIDIABETIC
Prescriber Highlights
Antihyperglycemic agent that reduces the rate & amount of glucose absorbed from the gut after a meal; may be useful for mild reductions in blood
glucose in dogs or cats. Unlikely to be effective when used as sole therapy.
Contraindications: Underweight animals, known hypersensitivity, diabetic ketoacidosis, inflammatory bowel disease, colonic ulceration, partial
intestinal obstruction or predisposition to obstruction, chr


In [41]:

response = multiple_entry("llama3.1",text)
print(f'\n llama:\n{response}')

ValidationError: 2 validation errors for Data
people.0
  Input should be a valid dictionary or instance of Person [type=model_type, input_value='dogs', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type
people.1
  Input should be a valid dictionary or instance of Person [type=model_type, input_value='cats', input_type=str]
    For further information visit https://errors.pydantic.dev/2.10/v/model_type

In [None]:



# The Schema

from typing import Optional
from pydantic import BaseModel, Field

class Person(BaseModel):
    """Information about a person."""

    # ^ Doc-string for the entity Person.
    # This doc-string is sent to the LLM as the description of the schema Person,
    # and it can help to improve extraction results.

    # Note that:
    # 1. Each field is an `optional` -- this allows the model to decline to extract it!
    # 2. Each field has a `description` -- this description is used by the LLM.
    # Having a good description can help improve extraction results.
    name: Optional[str] = Field(default=None, description="The name of the person")
    hair_color: Optional[str] = Field(
        default=None, description="The color of the person's hair if known"
    )
    height_in_meters: Optional[float] = Field(
        default=None, description="Height measured in meters"
    )

# The Extractor

from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field

# Define a custom prompt to provide instructions and any additional context.
# 1) You can add examples into the prompt template to improve extraction quality
# 2) Introduce additional parameters to take context into account (e.g., include metadata
#    about the document from which the text was extracted.)
prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm. "
            "Only extract relevant information from the text. "
            "If you do not know the value of an attribute asked to extract, "
            "return null for the attribute's value.",
        ),
        # Please see the how-to about improving performance with
        # reference examples.
        # MessagesPlaceholder('examples'),
        ("human", "{text}"),
    ]
)

from langchain_ollama import ChatOllama

def single_entry(model_name,text):

    structured_llm = ChatOllama(model=model_name,temperature=0.5,verbose=True).with_structured_output(schema=Person)

    prompt = prompt_template.invoke({"text": text})
    response = structured_llm.invoke(prompt)
    return response

from typing import List

class Data(BaseModel):
    """Extracted data about people."""

    # Creates a model so that we can extract multiple entities.
    people: List[Person]

def multiple_entry(model_name,text):
    structured_llm = ChatOllama(model=model_name,temperature=0.5,verbose=True).with_structured_output(schema=Data)
    prompt = prompt_template.invoke({"text": text})
    response = structured_llm.invoke(prompt)
    return response


if __name__ == '__main__':
    print ('--------------------llama3------------------------------')

    # llama3.1无法自动把feet转换成meter，所以我们把这个问题简化了一些，在text中直接用meter做单位。
    text = "Alan Smith is 1.83 meters tall and has blond hair."
    response = single_entry("llama3.1",text)
    print(f'\n llama3.1 response:\n{response}')

    text = "Alan Smith is 6 feet tall and has blond hair."
    response = single_entry("llama3.1",text)
    print(f'\n llama3.1 response:\n{response}')

    text = "Alan Smith is 1.83 meters tall and has blond hair. John Doe is 1.72 meters tall and has brown hair."
    response = multiple_entry("llama3.1",text)
    print(f'\n llama3.1 response:\n{response}')

    text = "Alan Smith is 1.88 meters tall and has blond hair. John Doe is 7 feet tall and has brown hair."
    response = multiple_entry("llama3.1",text)
    print(f'\n llama3.1 response:\n{response}')


    print ('---------------------deepseek------------------------------')
    
    text = "Alan Smith is 1.83 meters tall and has blond hair."
    response = single_entry("MFDoom/deepseek-r1-tool-calling:7b",text)
    print(f'\n deepseek response:\n{response}')

    text = "Alan Smith is 6 feet tall and has blond hair."
    response = multiple_entry("MFDoom/deepseek-r1-tool-calling:7b",text)
    print(f'\n deepseek response:\n{response}') 

    text = "Alan Smith is 1.83 meters tall and has blond hair. John Doe is 1.72 meters tall and has brown hair."
    response = multiple_entry("MFDoom/deepseek-r1-tool-calling:7b",text)
    print(f'\n deepseek response:\n{response}')

    text = "Alan Smith is 1.88 meters tall and has blond hair. John Doe is 7 feet tall and has brown hair."
    response = single_entry("MFDoom/deepseek-r1-tool-calling:7b",text)
    print(f'\n deepseek response:\n{response}')
    

--------------------llama3------------------------------

 llama3.1 response:
name='Alan Smith' hair_color='blond' height_in_meters=1.83

 llama3.1 response:
name='Alan Smith' hair_color='blond' height_in_meters=None

 llama3.1 response:
people=[Person(name='Alan Smith', hair_color='blond', height_in_meters=None), Person(name='John Doe', hair_color='brown', height_in_meters=None)]

 llama3.1 response:
people=[Person(name='Alan Smith', hair_color='blond', height_in_meters=None), Person(name='John Doe', hair_color='brown', height_in_meters=None)]
---------------------deepseek------------------------------

 deepseek response:
name='Alan Smith' hair_color='blond' height_in_meters=1.83


ValidationError: 1 validation error for Data
people
  Field required [type=missing, input_value={'properties': {'people':...ple'], 'type': 'object'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/missing

In [119]:
text = "Alan Smith is 1.88 meters tall and has blond hair. John Doe is 7 feet tall and has brown hair."
response = multiple_entry("llama3.1",text)
print(f'\n llama3.1 response:\n{response}')


 llama3.1 response:
people=[Person(name='Alan Smith', hair_color=None, height_in_meters=None), Person(name='John Doe', hair_color=None, height_in_meters=None)]


In [129]:
# The Schema

from typing import Optional
from pydantic import BaseModel, Field

class Medicine(BaseModel):
    """Information about a medicine."""

    name: Optional[str] = Field(default=None, description="The name of the medicine")
    use_case: Optional[str] = Field(default=[], description="The use cases for the medicine")
    contraindications: Optional[str] = Field(default=[], description="Contraindications for the medicine")
    side_effects: Optional[str]= Field(default=[], description="Side effects of the medicine")
    pharmacology: Optional[str]= Field(default=[], description="Pharmacological effects of the medicine")


# The Extractor

from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field


prompt_template = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are an expert extraction algorithm for extracting relevant information about medicines. "
            "Extract the medicine's name, use cases, contraindications, side effects, and pharmacological effects. "
            "If you do not know the value of an attribute asked to extract, return null for the attribute's value."
        ),
        ("human", "{text}"),
    ]
)

from langchain_ollama import ChatOllama

def single_entry(model_name, text):
    structured_llm = ChatOllama(model=model_name, temperature=0.5, verbose=True).with_structured_output(schema=Medicine)

    prompt = prompt_template.invoke({"text": text})
    response = structured_llm.invoke(prompt)
    return response

from typing import List

class Data(BaseModel):
    """Extracted data about medicines."""
    medicines: List[Medicine]

def multiple_entry(model_name, text):
    structured_llm = ChatOllama(model=model_name, temperature=0.5, verbose=True).with_structured_output(schema=Data)
    prompt = prompt_template.invoke({"text": text})
    print(f"Prompt: {prompt}")  # 打印出 prompt 看是否符合预期
    response = structured_llm.invoke(prompt)
    print(f"Response: {response}")  # 打印出返回值
    return response


if __name__ == '__main__':
    print('--------------------llama3------------------------------')

    # 示例：单个药物信息提取
    text = "ACARBOSE is used for diabetes control. It is contraindicated in pregnant women and people allergic to the drug."
    response = single_entry("llama3.1", text)
    print(f'\n llama3.1 response:\n{response}')

    text = "Metformin is used for controlling blood sugar levels in people with type 2 diabetes. It is contraindicated in patients with kidney disease."
    response = single_entry("llama3.1", text)
    print(f'\n llama3.1 response:\n{response}')

    text = "text = Aspirin is used for pain relief and reducing inflammation. It should not be used by people with stomach ulcers or bleeding disorders. Ibuprofen is used for reducing fever and pain relief. It can cause side effects such as gastrointestinal discomfort, bloating"
    response = multiple_entry("llama3.1", text)
    print(f'\n llama3.1 response:\n{response}')

    text = "Ibuprofen is used for reducing fever and pain relief. It can cause side effects such as gastrointestinal discomfort, bloating."
    response = multiple_entry("llama3.1", text)
    print(f'\n llama3.1 response:\n{response}')

--------------------llama3------------------------------


ValidationError: 1 validation error for Medicine
side_effects
  Input should be a valid string [type=string_type, input_value=['diarrhea', 'nausea'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type

In [131]:
text = 'Metformin is commonly used for controlling blood sugar levels in people with type 2 diabetes. It is contraindicated in patients with severe kidney disease, liver disease, or those who have a history of lactic acidosis. Side effects may include gastrointestinal discomfort, bloating, and nausea. Aspirin is widely used to reduce the risk of heart attacks in individuals with cardiovascular disease, but it should be avoided by patients with peptic ulcers or gastrointestinal bleeding. Statins, such as atorvastatin, are prescribed for lowering cholesterol in patients with hyperlipidemia, although they should not be used by pregnant women or individuals with liver disease. Hypertension is treated with ACE inhibitors like enalapril, but it is contraindicated for patients with a history of angioedema or severe kidney dysfunction. Lastly, prednisone is often used to treat autoimmune diseases such as lupus, but it should be avoided by patients with a history of severe infections or osteoporosis.'

response = multiple_entry("llama3.1", text)
print(f'\n llama3.1 response:\n{response}')

Prompt: messages=[SystemMessage(content="You are an expert extraction algorithm for extracting relevant information about medicines. Extract the medicine's name, use cases, contraindications, side effects, and pharmacological effects. If you do not know the value of an attribute asked to extract, return null for the attribute's value.", additional_kwargs={}, response_metadata={}), HumanMessage(content='Metformin is commonly used for controlling blood sugar levels in people with type 2 diabetes. It is contraindicated in patients with severe kidney disease, liver disease, or those who have a history of lactic acidosis. Side effects may include gastrointestinal discomfort, bloating, and nausea. Aspirin is widely used to reduce the risk of heart attacks in individuals with cardiovascular disease, but it should be avoided by patients with peptic ulcers or gastrointestinal bleeding. Statins, such as atorvastatin, are prescribed for lowering cholesterol in patients with hyperlipidemia, althou

ValidationError: 6 validation errors for Data
medicines.0.contraindications
  Input should be a valid string [type=string_type, input_value=['severe kidney disease',...ory of lactic acidosis'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
medicines.0.side_effects
  Input should be a valid string [type=string_type, input_value=['gastrointestinal discom...', 'bloating', 'nausea'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
medicines.1.contraindications
  Input should be a valid string [type=string_type, input_value=['peptic ulcers', 'gastrointestinal bleeding'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
medicines.2.contraindications
  Input should be a valid string [type=string_type, input_value=['pregnant women', 'liver disease'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
medicines.3.contraindications
  Input should be a valid string [type=string_type, input_value=['history of angioedema',...ere kidney dysfunction'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type
medicines.4.contraindications
  Input should be a valid string [type=string_type, input_value=['history of severe infections', 'osteoporosis'], input_type=list]
    For further information visit https://errors.pydantic.dev/2.10/v/string_type

In [126]:
response

Data(medicines=[Medicine(name='Metformin', use_case=[], contraindications='severe kidney disease, liver disease, lactic acidosis', side_effects=[], pharmacology=[]), Medicine(name='Aspirin', use_case=[], contraindications='peptic ulcers, gastrointestinal bleeding', side_effects=[], pharmacology=[]), Medicine(name='Atorvastatin', use_case=[], contraindications='pregnant women, liver disease', side_effects=[], pharmacology=[]), Medicine(name='Enalapril', use_case=[], contraindications='history of angioedema, severe kidney dysfunction', side_effects=[], pharmacology=[]), Medicine(name='Prednisone', use_case=[], contraindications='history of severe infections, osteoporosis', side_effects=[], pharmacology=[])])

In [127]:
with open("modified_text.txt", "r", encoding="utf-8") as f:
    text = f.read()

# 只取前5000个字符
text = text[:8000]

print("原始文本内容示例：")
print(text[:500])  # 预览前500个字符

原始文本内容示例：



ACARBOSE
(ay-kar-bose) Precose®
ORAL ANTIDIABETIC
Prescriber Highlights
Antihyperglycemic agent that reduces the rate & amount of glucose absorbed from the gut after a meal; may be useful for mild reductions in blood
glucose in dogs or cats. Unlikely to be effective when used as sole therapy.
Contraindications: Underweight animals, known hypersensitivity, diabetic ketoacidosis, inflammatory bowel disease, colonic ulceration, partial
intestinal obstruction or predisposition to obstruction, chr


In [128]:
response = multiple_entry("llama3.1", text)
print(f'\n llama3.1 response:\n{response}')

Response: None

 llama3.1 response:
None
