##**Final Project Code**



##Setting up the environment



In [16]:
!pip install llama-index
!pip install newsapi-python
!pip install langchain
!pip install gradio
!pip install langchain_community
!pip install -qU chromadb langchain-chroma
!pip install langchain-groq
!pip install -qU langchain-huggingface
!pip install huggingface_hub



In [28]:
from newsapi import NewsApiClient
import gradio
from langchain.chains import LLMChain
from langchain_chroma import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.chains.summarize import load_summarize_chain
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from huggingface_hub import InferenceClient
import os


In [18]:
os.environ['NewsAPI'] = "NEWSAPI"
os.environ["GROQ_API_KEY"] = 'GROQAPI'
llm = ChatGroq(model="mixtral-8x7b-32768")



#Fetching news articles using NEWS API


In [19]:

def fetch_news(topics):
    """Fetch news articles using the NewsAPI."""
    newsapi = NewsApiClient(api_key= os.environ['NewsAPI'])

    articles = []
    for topic in topics:
        all_articles = newsapi.get_everything(
            q=topic,
            language='en',
            sort_by='relevancy',
            page=5
        )
        articles.extend(all_articles['articles'])

    return articles


#Splitting fetched articles into chunks using CharacterTextSplitter

In [20]:

def split_articles(articles):

  text_splitter = CharacterTextSplitter(
      chunk_size=1000,
      chunk_overlap=200
  )

  all_chunks = []
  for article in articles:
      if 'content' in article:
          chunks = text_splitter.split_text(article['content'])
          all_chunks.extend(chunks)
      elif 'title' in article:
          chunks = text_splitter.split_text(article['title'])
          all_chunks.extend(chunks)
  return all_chunks


#Creating embeddings using chunks and storing them in Chroma db


In [21]:

def add_chunks_to_vector_db(all_chunks, persist_directory="./chroma_data"):
    """
    Embeds article chunks using a model and adds them to a Chroma vector database.

    Args:
        article_chunks (list[str]): List of article text chunks to embed and store.
        persist_directory (str): Path to the directory where the Chroma database will be persisted.

    Returns:
        Chroma: A Chroma vector database with the embedded article chunks.
    """
    try:

        embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")



        db = Chroma(
        collection_name="article_chunks",
        embedding_function= embeddings_model,
        persist_directory="./chroma_langchain_db")

        documents = [Document(page_content=chunk) for chunk in all_chunks]

        ids = [f"doc_{i}" for i in range(len(all_chunks))]
        db.add_documents(documents= documents, ids=ids)
        print("Your news is on its way! :)")
        return db

    except Exception as e:
        print(f"An error occurred: {e}")
        raise




#Retrieving top most relevant articles

In [22]:
def retrieve_top_articles(topic, db, k=10):
  """Retrieves the top k most relevant articles for a given topic using the vector database."""
  retriever = db.as_retriever(search_kwargs={"k": 10})
  relevant_articles = retriever.invoke(topic)
  return relevant_articles

#Generating individual article summaries and then a final, comprehensive summary of all relevant articles


In [23]:

def generate_comprehensive_summary_refine(relevant_articles):
  individual_summaries = []
  for article in relevant_articles:
    map_prompt = ChatPromptTemplate.from_messages([("system", "Write very high-quality summary that captures the key details, main arguments, and critical insights, including factual details:\\n\\n{article}")])
    map_chain = map_prompt | llm | StrOutputParser()
    summary = map_chain.invoke({"article": article.page_content})
    individual_summaries.append(summary)


    refine_template = """
    The following is a set of summaries:
    {docs}
    You are an expert summary synthesizer. Your task is to take these summaries, which each provide a snapshot of key insights, arguments, and important details, and refine them into one concise, coherent, comprehensive, and polished narrative. Your goal is to ensure that the final summary is logically structured, seamlessly integrates the individual pieces, and provides a clear and fluid overall narrative. Avoid repetition and ensure smooth transitions between points. The result should read like a well-written article that incorporates the most important aspects of the individual summaries, with clarity and conciseness.."""

    refine_prompt = ChatPromptTemplate([("human", refine_template)])
    refine_chain = refine_prompt | llm | StrOutputParser()
    final_summary = refine_chain.invoke((individual_summaries))
    return final_summary


##Generating image prompts based on the final summary


In [24]:
def generate_image_prompts(final_summary):
    """
    This function generates a high-quality image prompt based on a concise text summary using an LLM model.
    """

    # Define the prompt template with placeholders
    prompt_template = """
    You are a highly skilled AI model designed to generate detailed and creative image prompts based on concise text summaries.

    Please generate one high-quality, comprehensive image prompt based on the following summary:

    {final_summary}

    The image prompt should be:
    - Evocative and visually rich, describing the key visual elements, colors, lighting, and environment.
    - Focused on the mood and atmosphere implied in the summary, whether it's peaceful, dramatic, surreal, etc.
    - Precise in its details, ensuring it translates well for an image generation model like Stable Diffusion or DALL-E.
    """

    prompt = ChatPromptTemplate.from_template(prompt_template)
    messages = prompt.format_messages(final_summary=final_summary)
    response = llm(messages)
    return response.content

#Image generation

In [34]:
client = InferenceClient("stabilityai/stable-diffusion-3.5-large", token="HFTOKEN")

def generate_image_from_huggingface(prompt, output_dir="images"):
  image = client.text_to_image(prompt)


    # Resize the image to 400x400 pixels
  resized_image = image.resize((400, 400))

    # Ensure output directory exists
  os.makedirs(output_dir, exist_ok=True)

    # Save the resized image to a file
  image_path = os.path.join(output_dir, f"{prompt[:50].replace(' ', '_')}.jpg")
  resized_image.save(image_path, format="JPEG")

  return resized_image

#Generating newsletter

In [36]:
def generate_newsletter_with_summary_and_images(user_interests):
    """Generates a newsletter with summaries for each topic."""
    topics = [topic.strip() for topic in user_interests.split(",")]
    newsletter_content = ""

    for topic in topics:
        # Fetch articles for this specific topic
        articles = fetch_news([topic])

        # Split articles into chunks
        all_chunks = split_articles(articles)

        # Add chunks to vector database
        db = add_chunks_to_vector_db(all_chunks, persist_directory="./chroma_data")

        # Retrieve relevant articles for this topic
        relevant_articles = retrieve_top_articles(topic, db, k=10)

        # Generate a comprehensive summary for this topic
        topic_summary = generate_comprehensive_summary_refine(relevant_articles)

        #Generate an image prompt using the summary
        image_prompt = generate_image_prompts(topic_summary)

        #Generate an image using a prompt
        image = generate_image_from_huggingface(image_prompt)

        # Add topic summary to the newsletter content
        newsletter_content += f"## {topic}\n{topic_summary}\n\n"

    return newsletter_content, image


## Deploying Model using a Gradio **UI**


In [37]:

interface = gradio.Interface(
    fn=generate_newsletter_with_summary_and_images,
    inputs=gradio.Textbox(label="Enter your topic of interest", placeholder="e.g., Technology, AI, Health"),
    outputs=[gradio.Textbox(label="Newsletter Content"), gradio.Image(label="Generated Image")],
    title="AI-Powered Newsletter Generator",
    description="Enter topics to generate a newsletter with summaries and images.",
)

interface.launch(debug = True)

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://0381b324581c35ec9f.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Your news is on its way! :)
Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://0381b324581c35ec9f.gradio.live


