##**Final Project Code**



##Setting up the environment



In [None]:
!pip install llama-index
!pip install newsapi-python
!pip install langchain
!pip install gradio
!pip install langchain_community
!pip install -qU chromadb langchain-chroma
!pip install langchain-groq
!pip install -qU langchain-huggingface
!pip install huggingface_hub

Collecting llama-index
  Downloading llama_index-0.12.1-py3-none-any.whl.metadata (11 kB)
Collecting llama-index-agent-openai<0.5.0,>=0.4.0 (from llama-index)
  Downloading llama_index_agent_openai-0.4.0-py3-none-any.whl.metadata (726 bytes)
Collecting llama-index-cli<0.5.0,>=0.4.0 (from llama-index)
  Downloading llama_index_cli-0.4.0-py3-none-any.whl.metadata (1.5 kB)
Collecting llama-index-core<0.13.0,>=0.12.1 (from llama-index)
  Downloading llama_index_core-0.12.1-py3-none-any.whl.metadata (2.5 kB)
Collecting llama-index-embeddings-openai<0.4.0,>=0.3.0 (from llama-index)
  Downloading llama_index_embeddings_openai-0.3.0-py3-none-any.whl.metadata (684 bytes)
Collecting llama-index-indices-managed-llama-cloud>=0.4.0 (from llama-index)
  Downloading llama_index_indices_managed_llama_cloud-0.6.2-py3-none-any.whl.metadata (3.8 kB)
Collecting llama-index-legacy<0.10.0,>=0.9.48 (from llama-index)
  Downloading llama_index_legacy-0.9.48.post4-py3-none-any.whl.metadata (8.5 kB)
Collecting 

In [None]:
from newsapi import NewsApiClient
import gradio
from langchain.chains import LLMChain
from langchain_chroma import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains.summarize import load_summarize_chain
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from huggingface_hub import InferenceClient


In [None]:
import os

os.environ["GROQ_API_KEY"] = "GroqApi"

llm = ChatGroq(model="mixtral-8x7b-32768")


#Fetching news articles using NEWS API


In [None]:

def fetch_news(topics):
    """Fetch news articles using the NewsAPI."""
    newsapi = NewsApiClient(api_key='NewsAPI')

    articles = []
    for topic in topics:
        all_articles = newsapi.get_everything(
            q=topic,
            language='en',
            sort_by='relevancy',
            page=5
        )
        articles.extend(all_articles['articles'])

    return articles


#Splitting fetched articles into chunks using CharacterTextSplitter

In [None]:

def split_articles(articles):

  text_splitter = CharacterTextSplitter(
      chunk_size=1000,
      chunk_overlap=200
  )

  all_chunks = []
  for article in articles:
      if 'content' in article:
          chunks = text_splitter.split_text(article['content'])
          all_chunks.extend(chunks)
      elif 'title' in article:
          chunks = text_splitter.split_text(article['title'])
          all_chunks.extend(chunks)
  return all_chunks


#Creating embeddings using chunks and storing them in Chroma db


In [None]:

def add_chunks_to_vector_db(all_chunks, persist_directory="./chroma_data"):
    """
    Embeds article chunks using a model and adds them to a Chroma vector database.

    Args:
        article_chunks (list[str]): List of article text chunks to embed and store.
        persist_directory (str): Path to the directory where the Chroma database will be persisted.

    Returns:
        Chroma: A Chroma vector database with the embedded article chunks.
    """
    try:

        embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")



        db = Chroma(
        collection_name="article_chunks",
        embedding_function= embeddings_model,
        persist_directory="./chroma_langchain_db")

        documents = [Document(page_content=chunk) for chunk in all_chunks]

        ids = [f"doc_{i}" for i in range(len(all_chunks))]
        db.add_documents(documents= documents, ids=ids)
        print("Your news is on its way! :)")
        return db

    except Exception as e:
        print(f"An error occurred: {e}")
        raise




#Retrieving top most relevant articles

In [None]:
def retrieve_top_articles(topic, db, k=10):
  """Retrieves the top k most relevant articles for a given topic using the vector database."""
  retriever = db.as_retriever(search_kwargs={"k": 10})
  relevant_articles = retriever.invoke(topic)
  return relevant_articles

#Generating individual article summaries and then a final, comprehensive summary of all relevant articles


In [None]:

def generate_comprehensive_summary_refine(relevant_articles):
  individual_summaries = []
  for article in relevant_articles:
    map_prompt = ChatPromptTemplate.from_messages([("system", "Write very high-quality summary that captures the key details, main arguments, and critical insights, including factual details:\\n\\n{article}")])
    map_chain = map_prompt | llm | StrOutputParser()
    summary = map_chain.invoke({"article": article.page_content})
    individual_summaries.append(summary)


    refine_template = """
    The following is a set of summaries:
    {docs}
    You are an expert summary synthesizer. Your task is to take these summaries, which each provide a snapshot of key insights, arguments, and important details, and refine them into one concise, coherent, comprehensive, and polished narrative. Your goal is to ensure that the final summary is logically structured, seamlessly integrates the individual pieces, and provides a clear and fluid overall narrative. Avoid repetition and ensure smooth transitions between points. The result should read like a well-written article that incorporates the most important aspects of the individual summaries, with clarity and conciseness.."""

    refine_prompt = ChatPromptTemplate([("human", refine_template)])
    refine_chain = refine_prompt | llm | StrOutputParser()
    final_summary = refine_chain.invoke((individual_summaries))
    return final_summary


##Generating image prompts based on the final summary


In [None]:
def generate_image_prompts(final_summary):
    """
    This function generates a high-quality image prompt based on a concise text summary using an LLM model.
    """

    # Define the prompt template with placeholders
    prompt_template = """
    You are a highly skilled AI model designed to generate detailed and creative image prompts based on concise text summaries.

    Please generate one high-quality, comprehensive image prompt based on the following summary:

    {final_summary}

    The image prompt should be:
    - Evocative and visually rich, describing the key visual elements, colors, lighting, and environment.
    - Focused on the mood and atmosphere implied in the summary, whether it's peaceful, dramatic, surreal, etc.
    - Precise in its details, ensuring it translates well for an image generation model like Stable Diffusion or DALL-E.
    """

    prompt = ChatPromptTemplate.from_template(prompt_template)
    messages = prompt.format_messages(final_summary=final_summary)
    response = llm(messages)
    return response.content

#Image generation

In [None]:
client = InferenceClient("stabilityai/stable-diffusion-3.5-large", token="HFtoken2")

def generate_image_from_huggingface(prompt):
  image= client.text_to_image(prompt)
  return image

#Generating newsletter

In [None]:
def generate_newsletter_with_summary(user_interests):
    """Generates a newsletter with summaries and AI-generated images for each topic."""
    topics = [topic.strip() for topic in user_interests.split(",")]
    newsletter_content = ""

    for topic in topics:
        # Fetch articles for this specific topic
        articles = fetch_news([topic])

        # Split articles into chunks
        all_chunks = split_articles(articles)

        # Add chunks to vector database
        db = add_chunks_to_vector_db(all_chunks, persist_directory="./chroma_data")

        # Retrieve relevant articles for this topic
        relevant_articles = retrieve_top_articles(topic, db, k=10)

        # Generate a comprehensive summary for this topic
        topic_summary = generate_comprehensive_summary_refine(relevant_articles)

        # Generate image prompts from the summary
        image_prompts = generate_image_prompts(topic_summary)

        # Generate images from prompts (replace with your image generation logic)
        generated_images = []
        for prompt in image_prompts.split('\n'):
            if prompt.strip(): #check if the prompt is not empty
              try:
                image = client.text_to_image(prompt)
                generated_images.append(image)
              except Exception as e:
                print(f"Error generating image for prompt '{prompt}': {e}")
                generated_images.append(f"Error generating image: {e}") # Append error message

        # Add topic summary and images to the newsletter content
        newsletter_content += f"## {topic}\n{topic_summary}\n\n"
        for i, image in enumerate(generated_images):
            newsletter_content += f"Image {i+1}:\n{image}\n\n" # Placeholder for image display

    return newsletter_content


## Deploying Model using a Gradio **UI**


In [None]:

interface = gradio.Interface(
    fn=generate_newsletter_with_summary,
    inputs=gradio.Textbox(label="Enter Topics of Interest:(comma-separated)"),
    outputs=gradio.Textbox(label="Generated Comprehensive Newsletter"),
    title="Your Personalised Newsletter Generator",
    description="Enter topics you're interested in from the domains of Business, Entertainment, Health, Science, Sports, Technology, and this tool will generate a newsletter summarizing the latest articles on your topics of interest.",
)

if __name__ == "__main__":
    interface.launch(debug=True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://c57f0cbd2b9ca4dedf.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://c57f0cbd2b9ca4dedf.gradio.live
