In [18]:
from newsapi import NewsApiClient
from langchain.document_loaders import JSONLoader
from dotenv import load_dotenv
import os
import json
load_dotenv()



client = NewsApiClient( api_key = os.getenv("NEWS_API") )
results = client.get_everything( q = "cricket" )
all_news = []
for article in results.get("articles"):
    source = article.get("source").get("name")
    published = article.get("publishedAt")
    title = article.get("title")
    content = article.get("content")
    all_news.append({
        "title" : title , 
        "source" : source , 
        "published" : published , 
        "content" : content
    })

    
    


In [19]:
len(all_news)

98

In [20]:
with open( "results.json" , "w" ) as f :
    json.dump( all_news , f )

In [23]:
loader = JSONLoader( "results.json" , jq_schema = ".[].title, .[].source, .[].published, .[].content" )
data = loader.load()

In [27]:
data

[Document(metadata={'source': 'C:\\Users\\hp\\Desktop\\python practice\\Explore\\day-15\\results.json', 'seq_num': 1}, page_content="England mishandled 'pin-up girl' Wong - Robinson"),
 Document(metadata={'source': 'C:\\Users\\hp\\Desktop\\python practice\\Explore\\day-15\\results.json', 'seq_num': 2}, page_content="India v Pakistan: Cricket's ultimate grudge match in the desert"),
 Document(metadata={'source': 'C:\\Users\\hp\\Desktop\\python practice\\Explore\\day-15\\results.json', 'seq_num': 3}, page_content="England seamer Cross out of Women's Ashes Test"),
 Document(metadata={'source': 'C:\\Users\\hp\\Desktop\\python practice\\Explore\\day-15\\results.json', 'seq_num': 4}, page_content='How to Watch 2025 ICC Champions Trophy Cricket Live From Anywhere'),
 Document(metadata={'source': 'C:\\Users\\hp\\Desktop\\python practice\\Explore\\day-15\\results.json', 'seq_num': 5}, page_content='England did not lose because of athleticism - Lewis'),
 Document(metadata={'source': 'C:\\Users\\

In [30]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_ollama.embeddings import OllamaEmbeddings
from langchain_google_genai.llms import GoogleGenerativeAI
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain


In [29]:
text_splitter = RecursiveCharacterTextSplitter( chunk_size = 1000 , chunk_overlap = 200 )
splits = text_splitter.split_documents(data)

In [31]:
embeddings_model = OllamaEmbeddings( model = "nomic-embed-text:latest" )
db = Chroma.from_documents( splits , embeddings_model )
retriever = db.as_retriever()

In [33]:
prompt = ChatPromptTemplate.from_template(
    """
You are an expert at providing news in a documented manner,
Based on the given context give the news to the user with respected details.
Context:
{context}
Question:
{input}
"""
)
llm = GoogleGenerativeAI( model = "gemini-2.0-flash" )

combine_chain = create_stuff_documents_chain( llm = llm , prompt = prompt )
retrieval_chain = create_retrieval_chain( retriever = retriever , combine_docs_chain = combine_chain )

In [36]:
res = retrieval_chain.invoke({"input":"Give me news related to the champions trophy"})

In [37]:
res["answer"]

"Here's a news report summarizing the key details about the upcoming ICC Champions Trophy:\n\n**ICC Champions Trophy Returns with Increased Prize Money**\n\nThe ICC Champions Trophy is set to begin on Wednesday in Pakistan and Dubai, United Arab Emirates, featuring the world's top eight cricket teams vying for the title. The International Cricket Council (ICC) has announced a significant increase in the prize pool for this year's tournament, raising it to $6.9 million. This represents a 53% jump from the 2017 edition. The winning team will receive $2.24 million.\n"

"Here's a news report summarizing the key details about the upcoming ICC Champions Trophy:\n\n**ICC Champions Trophy Returns with Increased Prize Money**\n\nThe ICC Champions Trophy is set to begin on Wednesday in Pakistan and Dubai, United Arab Emirates, featuring the world's top eight cricket teams vying for the title. The International Cricket Council (ICC) has announced a significant increase in the prize pool for this year's tournament, raising it to $6.9 million. This represents a 53% jump from the 2017 edition. The winning team will receive $2.24 million.\n"