In [24]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [25]:
from langchain_openai import AzureChatOpenAI
model = AzureChatOpenAI(
    deployment_name=os.getenv("DEPLOYED_MODEL_NAME"),
    openai_api_version=os.getenv("OPENAI_API_VERSION"),
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY")
)

In [26]:
from langchain_community.document_loaders import JSONLoader
local_file_path = r"C:\Users\goura\OneDrive\Desktop\GenAI\Project3-Cognizant\nytworld.json"
loader = JSONLoader(file_path=local_file_path, jq_schema='.rss.channel.item[]', text_content=False)
docs = loader.load()
print(len(docs))

60


In [27]:
from langchain.prompts.chat import ChatPromptTemplate

prompt_template = ChatPromptTemplate.from_messages([
    ("system", 
     "You are a highly analytical assistant that summarizes business news articles from structured data.\n\n"
     "Your task is to extract ONLY the **relevant articles** that meet one of the following criteria:\n"
     "- CEO or CXO changes\n"
     "- Mergers or acquisitions\n"
     "- New product or service launches\n\n"
     
     "**IMPORTANT**: Do **NOT include articles** that are about:\n"
     "- Criminal cases (e.g., arson, fraud, sexual assault, theft)\n"
     "- Political events or incidents involving politicians\n"
     "- General news summaries, live feeds or generic news broadcasts\n"
     "- Personal scandals\n"
     "- Celebrity gossip or entertainment news\n"
     "- Placeholder articles or pages with no concrete business developments\n"
     "- Any other non-business-related topics\n\n"
     
     "Present ONLY the relevant articles in the following exact format:\n"
     "Title: <title>\n"
     "Link: <link>\n"
     "Publication Date: <pubDate> (in the format: Day, DD Month YYYY — exclude time and timezone)\n"
     "Summary: <brief 1-2 sentence summary>\n\n"
     
     "IMPORTANT RULES:\n"
     "- DO NOT write anything else.\n"
     "- DO NOT say 'No relevant articles found'.\n"
     "- DO NOT mention irrelevance, absence, or summaries.\n"
     "- If there are no relevant articles, RETURN NOTHING. Leave the output completely empty.\n"
     "- Use **human-readable characters** only. Do NOT return escaped Unicode (e.g., '\\u00e4'); write full characters (e.g., 'ä').\n"
    ),
    
    ("user", "Context:\n{context}")
])


#### Provide the value of N (which will split the document into N equal parts):

In [28]:
import math
N = 6
k = math.ceil(len(docs) / N)
context_result = []
for i in range(0, len(docs), k):
    context_result.append("\n\n".join(doc.page_content for doc in docs[i:i+k]))

context_result

['{"title": "Live Updates: Trump Addresses Saudi Investment Forum as He Begins Gulf Tour", "link": "https://www.nytimes.com/live/2025/05/13/us/trump-news-saudi-arabia", "guid": {"@isPermaLink": "true", "#text": "https://www.nytimes.com/live/2025/05/13/us/trump-news-saudi-arabia"}, "atom:link": {"@href": "https://www.nytimes.com/live/2025/05/13/us/trump-news-saudi-arabia", "@rel": "standout"}, "description": "The president, who will also visit Qatar and the United Arab Emirates, is making the first major international trip of his second term. He was set to speak at a meeting of business leaders, where many top U.S. executives are seeking deals.", "dc:creator": "The New York Times", "pubDate": "Tue, 13 May 2025 15:33:34 +0000", "media:content": {"@height": "1798", "@medium": "image", "@url": "https://static01.nyt.com/images/2025/05/13/multimedia/13trump-news-promo-11am-gkvp/13trump-news-promo-11am-gkvp-mediumSquareAt3X.jpg", "@width": "1800"}, "media:credit": "Doug Mills/The New York Tim

In [29]:
chain = prompt_template | model
final_response = []
for chunk in context_result:
    final_response.append(chain.invoke({"context":chunk}).content)
final_result = "\n\n".join(final_response)
print(final_result)

Title: Tariffs Push Honda to Move Production From Canada to U.S.
Link: https://www.nytimes.com/2025/05/13/world/americas/honda-trump-tariffs-us-canada.html
Publication Date: Tue, 13 May 2025
Summary: Honda is transferring production from Canada to the U.S. due to President Trump's trade war, impacting plans for an electric vehicle factory in Canada.

Title: Kate Middleton Presents 2025 Queen Elizabeth II Award for Design
Link: https://www.nytimes.com/2025/05/13/style/kate-middleton-queen-elizabeth-ii-award-design.html
Publication Date: Tue, 13 May 2025
Summary: The British royal made a surprise appearance on Tuesday to present a fashion award named for Queen Elizabeth II.

Title: Avelo Airlines Faces Backlash for Aiding Trump’s Deportation Campaign
Link: https://www.nytimes.com/2025/05/12/business/trump-deportation-flights-avelo-airlines.html
Publication Date: Tue, 13 May 2025
Summary: Avelo Airlines is under scrutiny for agreeing to operate chartered flights for Immigration and Custom