In [3]:
%%capture --no-stderr
%pip install -U --quiet langchain-community tiktoken langchain-openai langchainhub chromadb langchain langgraph langchain-text-splitters beautifulsoup4

In [None]:
%%capture --no-stderr
%pip install pandas 

In [13]:
%%capture --no-stderr
%pip install langchain transformers sentence-transformers

In [18]:
%%capture --no-stderr
%pip install hf_xet

In [1]:
import getpass
import os
import pandas as pd

from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.schema import Document
from langchain_huggingface import HuggingFaceEmbeddings



In [2]:
df = pd.read_csv("AAPL.csv")
df.head()

Unnamed: 0,Date,Article_title,Stock_symbol,Url,Publisher,Author,Article,Lsa_summary,Luhn_summary,Textrank_summary,Lexrank_summary
0,2023-12-16 22:00:00 UTC,My 6 Largest Portfolio Holdings Heading Into 2...,AAPL,https://www.nasdaq.com/articles/my-6-largest-p...,,,"After an absolute disaster of a year in 2022, ...",3: Apple There's little question that Apple (N...,3: Apple There's little question that Apple (N...,3: Apple There's little question that Apple (N...,3: Apple There's little question that Apple (N...
1,2023-12-16 22:00:00 UTC,Brokers Suggest Investing in Apple (AAPL): Rea...,AAPL,https://www.nasdaq.com/articles/brokers-sugges...,,,"When deciding whether to buy, sell, or hold a ...",Let's take a look at what these Wall Street he...,Click to get this free report Apple Inc. (AAPL...,Let's take a look at what these Wall Street he...,Brokerage Recommendation Trends for AAPL Let's...
2,2023-12-16 21:00:00 UTC,"Company News for Dec 19, 2023",AAPL,https://www.nasdaq.com/articles/company-news-f...,,,Shares of Apple Inc. AAPL lost 0.9% on China’s...,Shares of Apple Inc. AAPL lost 0.9% on China’s...,Click to get this free report Apple Inc. (AAPL...,Click to get this free report Apple Inc. (AAPL...,Click to get this free report Apple Inc. (AAPL...
3,2023-12-16 21:00:00 UTC,NVIDIA (NVDA) Up 243% YTD: Will It Carry Momen...,AAPL,https://www.nasdaq.com/articles/nvidia-nvda-up...,,,NVIDIA Corporation NVDA has witnessed a remark...,Other Stocks in the $1T Club Apart from NVIDIA...,Other Stocks in the $1T Club Apart from NVIDIA...,Other Stocks in the $1T Club Apart from NVIDIA...,Other Stocks in the $1T Club Apart from NVIDIA...
4,2023-12-16 21:00:00 UTC,"Pre-Market Most Active for Dec 19, 2023 : BMY,...",AAPL,https://www.nasdaq.com/articles/pre-market-mos...,,,The NASDAQ 100 Pre-Market Indicator is up 10.1...,"Apple Inc. (AAPL) is +0.86 at $196.75, with 1,...","Apple Inc. (AAPL) is +0.86 at $196.75, with 1,...","Apple Inc. (AAPL) is +0.86 at $196.75, with 1,...","Apple Inc. (AAPL) is +0.86 at $196.75, with 1,..."


In [3]:
print("NaNs in Publisher:", df['Publisher'].isna().sum())
print("NaNs in Author:", df['Author'].isna().sum())

NaNs in Publisher: 8865
NaNs in Author: 9338


In [4]:
df.drop(['Publisher', 'Author'], axis=1, inplace=True)

In [5]:
df_sample = df.head(10)

df_sample.to_csv("AAPL_sample.csv", index=False)

In [6]:
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False}
hf = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [7]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
import chromadb

In [8]:
print(chromadb.__version__)

1.0.4


In [24]:
from langchain.schema import Document  
import os

db_location = "./chrome_sample"
add_documents = not os.path.exists(db_location)

if add_documents:
    documents = []
    ids = []

    for i, row in df_sample.iterrows():
        page_content = (
            f"{row['Article_title']}\n\n"
            f"{row['Article']}\n\n"
            f"Lsa: {row['Lsa_summary']}\n"
            f"Luhn: {row['Luhn_summary']}\n"
            f"TextRank: {row['Textrank_summary']}\n"
            f"LexRank: {row['Lexrank_summary']}"
        )

        document = Document(
            page_content=page_content,
            metadata={"date": row["Date"], "stock": row["Stock_symbol"], "url": row["Url"]},
            id=str(i)
        )
        ids.append(str(i))
        documents.append(document)

    
    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=100,
        chunk_overlap=50
    )
    doc_splits = text_splitter.split_documents(documents)

    
    vectorstore = Chroma.from_documents(
        documents=doc_splits,
        persist_directory=db_location,
        collection_name="rag-chroma",
        embedding=hf
    )


retriever = vectorstore.as_retriever()



In [None]:
import shutil
import os

db_location = "./chrome_sample"

if os.path.exists(db_location):
    shutil.rmtree(db_location)
    print("Chroma DB deleted.")
else:
    print("Chroma DB not found.")


Chroma DB not found.


In [25]:
from langchain.tools.retriever import create_retriever_tool

retriever_tool = create_retriever_tool(
    retriever,
    "retrieve_aapl_articles",
    "Search and return information about Apple (AAPL) articles, stock updates, and summaries.",
)

tools = [retriever_tool]

In [26]:
### LLM
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate

local_llm = "llama3"
llm = ChatOllama(model=local_llm, temperature=0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")


In [28]:
template = """
You are an exeprt in answering questions about Apple (AAPL) articles, stock updates, and summaries

Here are some relevant articles: {articles}

Here is the question to answer: {question}
"""
prompt = ChatPromptTemplate.from_template(template)
chain = prompt | llm

while True:
    print("\n\n-------------------------------")
    question = input("Ask your question (q to quit): ")
    print("\n\n")
    if question == "q":
        break
    
    articles = retriever.invoke(question)
    result = chain.invoke({"articles": articles, "question": question})
    print(result)



-------------------------------



content='I\'m happy to help!\n\nSince you asked about NVIDIA (NVDA), I\'ll focus on the relevant articles. Here are some key points:\n\n1. **Generative AI investments**: The article "NVIDIA Corporation Price and Consensus" suggests that NVDA\'s surge is driven by hopes that the company will be a prime beneficiary of growing investments in generative AI. This could carry momentum into 2024.\n2. **CEO Jensen Huang\'s vision**: In the article "My 6 Largest Portfolio Holdings Heading Into 2024 And The Important Investing Lesson I", it\'s mentioned that CEO Jensen Huang has a knack for recognizing technology trends and adapting Nvidia\'s processors to meet those needs. This is evident in Nvidia\'s dominance in the discrete desktop GPU space, controlling roughly 70% of the market.\n3. **Robust financial performance**: According to the article "NVIDIA Corporation NVDA has witnessed a remarkable run", NVIDIA achieved a massive milestone by joining the exclu