In [None]:
from Convert_news_to_json import convert_news_to_json
from Graph_PreProcess import Pre4Graph

convert_news_to_json()
graph = Pre4Graph().preprocess_graph()

Number of articles: 38
Number of graph documents: 38
----------------------------------------------------------------------------------------------------
Extracted nodes: [Node(id='No Man’S Sky', type='Product', properties={'category': 'space exploration sim'}), Node(id='Voyagers Update', type='Product', properties={})]
Extracted relationships: []
----------------------------------------------------------------------------------------------------
Extracted nodes: [Node(id='Microsoft', type='Company', properties={'industry': 'technology'}), Node(id='Xbox Game Pass September 2025 Wave 1', type='Product', properties={'name': 'Xbox Game Pass September 2025 Wave 1'}), Node(id='Hollow Knight: Silksong', type='Product', properties={'name': 'Hollow Knight: Silksong'}), Node(id='Team Cherry', type='Company', properties={'name': 'Team Cherry'})]
Extracted relationships: [Relationship(source=Node(id='Microsoft', type='Company', properties={}), target=Node(id='Xbox Game Pass September 2025 Wave 1'

In [44]:
graph.refresh_schema()

### Create Node and Relationship Using NewsArticle Metadata

##### - Change Document Entity to NewsArticle

In [None]:
cypher_query = """  
MATCH (d:Document)
SET d:NewsArticle
REMOVE d:Document
RETURN COUNT(d) as NewsArticle_count
"""
graph.query(cypher_query)

[{'NewsArticle_count': 38}]

##### - Set Media as Company Entity

In [9]:
cypher_query = """
MATCH (n:NewsArticle)
MERGE (c:Company {name: n.source})
RETURN COUNT(c) as Company_count
"""
company_count = graph.query(cypher_query)
print(f"Number of Media Company: {company_count}")

cypher_query = """  
MATCH (n:NewsArticle), (c:Company)
WHERE n.source = c.name
MERGE (n) -[:PUBLISHED_BY]->(c)
RETURN c.name as Company_name, COUNT(n) as NewsArticle_count
"""
company_news_count = graph.query(cypher_query)
print(company_news_count)

Number of Media Company: [{'Company_count': 38}]
[{'Company_name': 'Kotaku', 'NewsArticle_count': 2}, {'Company_name': 'IGN', 'NewsArticle_count': 1}, {'Company_name': '9to5Mac', 'NewsArticle_count': 2}, {'Company_name': 'Ars Technica', 'NewsArticle_count': 2}, {'Company_name': 'The Verge', 'NewsArticle_count': 1}, {'Company_name': 'MacRumors', 'NewsArticle_count': 2}, {'Company_name': 'Bloomberg', 'NewsArticle_count': 1}, {'Company_name': 'Nintendo Life', 'NewsArticle_count': 1}, {'Company_name': 'Windows Central', 'NewsArticle_count': 1}, {'Company_name': 'TechCrunch', 'NewsArticle_count': 1}, {'Company_name': '9to5google.com', 'NewsArticle_count': 1}, {'Company_name': 'Bethesda.net', 'NewsArticle_count': 2}, {'Company_name': 'Blizzard.com', 'NewsArticle_count': 1}, {'Company_name': 'Servethehome.com', 'NewsArticle_count': 1}, {'Company_name': 'Rudebaguette.com', 'NewsArticle_count': 1}, {'Company_name': 'Gizmodo.com', 'NewsArticle_count': 2}, {'Company_name': 'Foxweather.com', 'News

In [13]:
query = """
MATCH (n:NewsArticle)

WITH DISTINCT n.author AS author, n.source AS source, n.id AS articleId

// Create Person Node
MERGE (p:Person {name: author})

WITH p, source, articleId

MERGE (c:Company {name: source})

// Create Works_For Relationship between Person and Company
MERGE (p)-[:WORKS_FOR]->(c)

WITH p, articleId

// Create Wrote Relationship between Person and NewsArticle
MATCH (a:NewsArticle {id: articleId})

// Create Wrote Relationship between Person and NewsArticle
MERGE (p)-[:WROTE]->(a)

// Return the number of unique Person Node
RETURN count(DISTINCT p) AS count
"""

print(f"Number of unique Person Node: {graph.query(query)[0]['count']}")

Number of unique Person Node: 32


In [None]:
graph.refresh_schema()
print(graph.schema)

Node properties:
- **Person**
  - `name`: STRING Example: "Claire Jackson"
- **NewsArticle**
  - `id`: STRING Example: "tech_0"
  - `date`: STRING Example: "2025-09-02T19:53:27Z"
  - `title`: STRING Example: "No Man’s Sky Fans Are Doing Wild Stuff As The Game"
  - `text`: STRING Example: "With the Voyagers update, it seems clear that No M"
  - `source`: STRING Example: "Kotaku"
  - `author`: STRING Example: "Claire Jackson"
- **Company**
  - `id`: STRING Available options: ['Microsoft', 'Team Cherry', 'Apple', 'Google', 'Crystal Dynamics', 'Take-Two', 'Wordpress', 'Samsung', 'Nasa', 'Silicon Valley']
  - `industry`: STRING Available options: ['technology', 'space']
  - `name`: STRING Example: "Team Cherry"
  - `type`: STRING Available options: ['Media']
- **Product**
  - `id`: STRING Example: "No Man’S Sky"
  - `category`: STRING Available options: ['space exploration sim', 'operating system', 'film', 'gaming console', 'AI tool', 'NVMe SSD', 'asteroid', 'planet', 'satellite', 'plastic'

In [None]:
# In DB, Attribute name is id, but it is not used.
['Company', 'Product', 'Technology']
for entity in ['Company', 'Product', 'Technology']:
    cypher_query = f"""
    MATCH ({entity}:{entity})
    WHERE {entity}.id IS NOT NULL AND {entity}.name IS NULL
    SET {entity}.name = {entity}.id
    REMOVE {entity}.id
    """
graph.query(cypher_query)

[]

### Generate vector index and Neo4j Vector

In [28]:
graph.refresh_schema()

In [39]:
import importlib
import Generate_VectorIndex as GVI
import KG_RAG_Chain
importlib.reload(GVI)
importlib.reload(KG_RAG_Chain)

vector_store = GVI.VectorIndex(graph).connect_vector_index()
kg_rag_chain = KG_RAG_Chain.kg_enhanced_rag_chain(vector_store, graph)

Vector Index Generated: 1~10 / 38, Updated: 10
----------------------------------------------------------------------------------------------------
Vector Index Generated: 11~20 / 38, Updated: 10
----------------------------------------------------------------------------------------------------
Vector Index Generated: 21~30 / 38, Updated: 10
----------------------------------------------------------------------------------------------------
Vector Index Generated: 31~38 / 38, Updated: 8
----------------------------------------------------------------------------------------------------


In [46]:
example_question = ["What is the latest news about Apple?", 
                    "What is the latest product from Samsung? and what is this product based on?", 
                    "What is the most mentioned company in the news? and what is this company based on?"]

for question in example_question:
    result = kg_rag_chain(question)
    print(result)
    print("--------------------------------")


The latest news about Apple includes two major updates:

1. Apple has released the ninth developer betas of iOS 26 and iPadOS 26 for testing purposes, continuing the development of its next-generation operating systems.
2. Apple is about to launch the AirPods Pro 3, and reports indicate the company is following a similar upgrade strategy as it did with the AirPods Pro 2, aiming to build on the previous model's success.

These updates highlight Apple's ongoing focus on both software and hardware innovation.
--------------------------------
Based on the provided news article content and knowledge graph information, there is no mention of any Samsung product or related news. Therefore, I don't know what the latest product from Samsung is or what it is based on.
--------------------------------
The most mentioned company in the news is Apple. Based on the information provided, Apple is a technology company known for releasing products such as the AirPods Pro 3, AirPods Pro 2, Vision Pro, a