In [1]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_openai import ChatOpenAI
import os
import dotenv
from articles_parser import parse_news
from typing import List
from langchain_core.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser

In [2]:
# parameters and constants
dotenv.load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_MODEL = "gpt-3.5-turbo-0125"
TEMPERATURE = 0

In [3]:
class ArticleSummary(BaseModel):
    relations: List[str] =  Field(..., description="Relations extracted from the article, must contain exactly 2 entities and a type of relation between theme, each relation must be in the form of '<entity1> <relation> <entity2>'")
    properties: List[str] = Field(..., description="Properties of entity extracted from the article, must contain exactly 1 entity and one or more property of that entity, each property must be in the form of '<entity> <property>'")

class ArticleLLM():
    def __init__(self, model, api_key, temperature):
        self.parser = PydanticOutputParser(pydantic_object=ArticleSummary)
        
        self.class_context_template =  """
            You are a data modeling expert. You will be given an article and you will help me structure the data based on this format.
            
            Don't need to put '-' between entity and relation or property. For example, if the entity is 'Apple' and the relation is 'founded', you should write 'Apple founded Steve Jobs'.
            
            Entities can only be one of people, locations, organizations, events, objects, incidents, and resolutions. Otherwise, don't consider it as an entity.
            
            Don't include any relations or properties that are not in the article.
            
            {format_instructions}
            
            Article: {article} 
        """
        
        self.cypher_context_template = """
            You are also familiar with Cypher Query Language and Neo4j graph database.
            
            Based on the given relations and properties, you will help me to create a graph database query using Cypher Query Language.
            
            Combine everything into a single query without using any comments using the MERGE Command. 
            
            Make sure each entity is declared before using them in the relation.
            
            Relations and properties extracted from the article: 
            {response}
        """
        
        self.context_prompt = PromptTemplate(
            input_variables=["article"],
            template=self.class_context_template,
            partial_variables={"format_instructions": self.parser.get_format_instructions()}
        )
        
        self.cypher_prompt = PromptTemplate(
            input_variables=["response"],
            template=self.cypher_context_template
        )
        
        self.llm = ChatOpenAI(
            model=model,
            api_key=api_key,
            temperature=temperature
        )
        
        self.context_chain = self.context_prompt | self.llm | self.parser
        self.cyper_chain = self.cypher_prompt | self.llm
    
    def __run(self, article_link):
        article = parse_news(article_link)['text']
        response = self.context_chain.invoke({"article":article})
        return response
    
    def __format_output(self, response):
        try:
            relations = response.relations
        except:
            relations = []
        
        try:
            properties = response.properties
        except:
            properties = []
        
        outputs = relations + properties
        outputs = '\n'.join(outputs)
        
        return outputs
    
    def inference(self, article_link):
        response = self.__run(article_link)
        response = self.__format_output(response) 
        response = self.cyper_chain.invoke({"response":response}).content
        
        return response

In [4]:
article_link = "https://www.bbc.com/news/world-asia-67783043"
model = ArticleLLM(OPENAI_MODEL, OPENAI_API_KEY, TEMPERATURE)
response = model.inference(article_link)

In [5]:
print(response)

MERGE (pm:PrimeMinister {name: "Fumio Kishida", approval_ratings: 17})
MERGE (cabinetMinistersResigned:Minister {number_resigned: 4})
MERGE (publicOutrage:PublicOutrage {description: "Public anger and outrage has boiled over on social media"})
MERGE (hopeForReforms:HopeForReforms {description: "Some hope this could be a tipping point for reforms in governance"})
MERGE (ldp:Party {name: "Liberal Democratic Party", ruling_since: 1955})
MERGE (votersDisillusioned:VotersDisillusioned {description: "Voters in the leading Asian democracy are feeling disillusioned and cynical"})
MERGE (ldpScandals:Scandals {description: "LDP politicians accused of pocketing excess funds received at fundraisers"})
MERGE (prosecutorsRaided:ProsecutorsRaided {description: "Prosecutors raided the offices of the Abe group and the Nikai faction"})
MERGE (underreportingInvestigation:UnderreportingInvestigation {description: "Five of the six LDP factions are under investigation for underreporting ticket funds"})
MERG

## Neo4j

In [6]:
from neo4j import GraphDatabase

In [7]:
URI = os.getenv('NEO4J_URI')
AUTH = os.getenv('NEO4J_USER'), os.getenv('NEO4J_PASSWORD')

In [8]:
def neo4j_delete_all(db_name="neo4j"):
    global URI, AUTH
    driver = GraphDatabase.driver(URI, auth=AUTH)
    driver.execute_query(
        "MATCH (n) DETACH DELETE n",
        database_=db_name
    )
    driver.close()
    print(f"Successfully deleted all nodes and relationships in the database ({db_name})")

def neo4j_add_query(query, db_name="neo4j"):
    global URI, AUTH
    driver = GraphDatabase.driver(URI, auth=AUTH)
    driver.execute_query(
        query,
        database_=db_name
    )
    driver.close()
    print(f"Successfully added the query to the database ({db_name})")

def neo4j_num_element(db_name="neo4j"):
    global URI, AUTH
    driver = GraphDatabase.driver(URI, auth=AUTH)
    count = driver.execute_query(
        "MATCH (n) RETURN COUNT(n)",
        database_=db_name
    )
    print(count.records[0][0])
    driver.close()
    print(f"Successfully added the query to the database ({db_name})")

In [9]:
neo4j_num_element()

36
Successfully added the query to the database (neo4j)


In [10]:
neo4j_add_query(response)

Successfully added the query to the database (neo4j)
