In [1]:
import os
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_mistralai import ChatMistralAI

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [2]:
load_dotenv()
model = ChatMistralAI(
    model="mistral-small-latest",
    temperature=0.1
)

In [3]:
class Claim(BaseModel):
    claim: str = Field(description="A single verifiable factual statement")
    claim_type: str = Field(description="health | death | policy | event | statistic | relationship | other")
    entities: list[str] = Field(description="main named entities involved")
    time: str = Field(description="time mentioned or NAN")
    location: str = Field(description="location mentioned or NAN")

class RumorSchema(BaseModel):
    claims: list[Claim]

In [4]:
structured_llm = model.with_structured_output(RumorSchema)

In [9]:
prompt = ChatPromptTemplate.from_template("""
Given a rumor, break it into independent factual claims that can be verified true or false.

Rules:
- Each claim must be a single checkable statement
- Do NOT merge multiple facts into one claim
- Do NOT explain or summarize
- Extract even implied claims
- If time or location missing return NAN
- Return STRICT JSON only

Schema:
claims: [
    {{
        claim(string)
        claim_type(string: health | death | policy | event | statistic | relationship | other)
        entities(list of strings)
        time(string)
        location(string)
    }}
]

Rumor: {rumor}
""")


In [10]:
chain = prompt | structured_llm

In [11]:
result = chain.invoke({
    "rumor": "From whatsapp doctors say drinking cold water after meals causes stomach cancer in India"
})
print(result)
print(result.model_dump())
print(type(result))
print(type(result.model_dump()))
#have to return result.model_dump() as dict and convert to json by json.loads(dict)function

claims=[Claim(claim='doctors say drinking cold water after meals causes stomach cancer', claim_type='health', entities=['doctors'], time='NAN', location='NAN'), Claim(claim='the claim is spread from whatsapp', claim_type='other', entities=['whatsapp'], time='NAN', location='NAN'), Claim(claim='the claim is about India', claim_type='other', entities=['India'], time='NAN', location='India')]
{'claims': [{'claim': 'doctors say drinking cold water after meals causes stomach cancer', 'claim_type': 'health', 'entities': ['doctors'], 'time': 'NAN', 'location': 'NAN'}, {'claim': 'the claim is spread from whatsapp', 'claim_type': 'other', 'entities': ['whatsapp'], 'time': 'NAN', 'location': 'NAN'}, {'claim': 'the claim is about India', 'claim_type': 'other', 'entities': ['India'], 'time': 'NAN', 'location': 'India'}]}
<class '__main__.RumorSchema'>
<class 'dict'>
