In [None]:
!pip install langchain
!pip install langchain-core
!pip install langchain-community
!pip install ollama
!pip install colab-xterm
!curl -fsSL https://ollama.com/install.sh | sh
!pip install newsapi-python
!pip install jq
!pip install chromadb

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import JSONLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
import ollama

# Load documents from a JSON file
def load_documents(file_path):
    """
    Loads documents from a JSON file using a predefined schema.

    Parameters:
    file_path (str): The path to the JSON file containing the documents.

    Returns:
    list: A list of documents loaded from the file.
    """
    loader = JSONLoader(file_path=file_path, jq_schema='.[] | { description: .description, url: .url}', text_content=False)
    return loader.load()

# Split documents into manageable chunks
def split_documents(documents):
    """
    Splits documents into smaller chunks to manage processing load.

    Parameters:
    documents (list): A list of documents to be split.

    Returns:
    list: A list of split document chunks.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    return text_splitter.split_documents(documents)

# Create embeddings and vector store
def create_vector_store(documents):
    """
    Creates a vector store from document embeddings.

    Parameters:
    documents (list): A list of documents or text chunks.

    Returns:
    VectorStore: A vector store containing the documents' embeddings.
    """
    embedding_model = OllamaEmbeddings(model="llama3")
    vector_store = Chroma.from_documents(documents=documents, embedding=embedding_model)
    return vector_store.as_retriever()


In [None]:
from newsapi import NewsApiClient
import json

#Paste your Api key
newsapi = NewsApiClient(api_key='')

def latest_news(data):
    try:
        all_articles = newsapi.get_everything(q=data, language='en', sort_by='publishedAt')
        extracted_data = []
        k=0
        for article in all_articles['articles']:
            if k>8:
                break
            extracted_data.append({
                'description': article.get('description', 'No description available'),
                'url': article.get('url', 'No Url')
                        })
        with open('news.json', 'w') as p:
            json.dump(extracted_data, p)
    except Exception as e:
        print(f"Failed to fetch news articles: {e}")
        return None

In [None]:
%load_ext colabxterm

In [None]:
# Enter "Ollama Serve" command below
# This will start a ollama server
%xterm

In [None]:
# Enter "Ollama run llam3" command below
# This will download llama3 and run it on ollama server
%xterm

In [None]:
from datetime import date
def generate_newsletter(topic):
    latest_news(topic)
    question = f"""
        # Your Daily Digest: {date.today()}

        Welcome to your curated news update, bringing you the latest and most relevant headlines directly to your inbox.

        ## Today's Top Story
        ### [Title of the Main News Article](URL_to_article)
        Provide a brief introduction to the top story of the day, emphasizing the main points succinctly.

        ---

        ## More News

        ### [Second News Article Title](URL_to_second_article)
        **Summary**: Offer a concise summary of the second most important news of the day.

        ### [Third News Article Title](URL_to_third_article)
        **Summary**: Summarize this article, highlighting key details that inform the reader effectively.

        ### [Fourth News Article Title](URL_to_fourth_article)
        **Summary**: Briefly cover the fourth article, focusing on crucial points.

        ### [Fifth News Article Title](URL_to_fifth_article)
        **Summary**: Sum up the fifth article, ensuring to pinpoint essential information.

        ---

        **Instructions**:
        - Write a news summary for the topic: '{topic}'.
        - Ensure the news summaries do not repeat information.
        - Follow the structure provided above as a template for the news summary.
        """
    documents = load_documents('news.json')
    document_splits = split_documents(documents)
    retriever = create_vector_store(document_splits)

    formatted_context = "\n\n".join(doc.page_content for doc in retriever.invoke(topic))
    formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
    llm_response = ollama.chat(model='llama3', messages=[{'role': 'user', 'content': formatted_prompt}])
    return llm_response['message']['content']

In [None]:
#Enter the topic for the news below
newsletter = generate_newsletter('World News')

In [None]:
from IPython.display import display, Markdown
display(Markdown(newsletter))