In [None]:
!pip install -q --upgrade google-generativeai langchain-google-genai chromadb pypdf

# **ALL THE NECESSARY INSTALLATIONS FOR TEXT FORMATING**

In [None]:
from IPython.display import display
from IPython.display import Markdown
import textwrap


def to_markdown(text):
  text = text.replace('•', '  *')
  return Markdown(textwrap.indent(text, '> ', predicate=lambda _: True))

In [None]:
import google.generativeai as genai
from google.colab import userdata

# **INITIALIZE THE API**

In [None]:
# GOOGLE_API_KEY="AIzaSyDOsZGpQ0ab4QI4nw562Ty3cK4"
import os
GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')
genai.configure(api_key=GOOGLE_API_KEY)

In [None]:
model = genai.GenerativeModel(model_name = "gemini-pro")

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [None]:
llm = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=GOOGLE_API_KEY)

In [None]:
!pip install langchain

# **INITIALIZING MODEL AND LOADING THE QA CHAIN**

In [None]:
import urllib
import warnings
from pathlib import Path as p
from pprint import pprint

import pandas as pd
from langchain import PromptTemplate
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

warnings.filterwarnings("ignore")
# restart python kernal if issues with langchain import.

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model="gemini-pro",google_api_key=GOOGLE_API_KEY,
                             temperature=0.2,convert_system_message_to_human=True)

# **DYNAMIC LOADING PDF FILES FROM GOOGLE DRIVE**

In [None]:
from google.colab import drive
drive.mount('/content/gdrive',force_remount=True)
root_dir="/content/gdrive/My Drive/StockgroPDF"

Mounted at /content/gdrive


In [None]:
!pip install PyPDF2
from PyPDF2 import PdfReader
def load_pdf_content(root_dir):
    data = {}
    full_data=""
    for filename in os.listdir(root_dir):
        if filename.endswith('.pdf'):
            print(filename)
            filepath = os.path.join(root_dir, filename)
            with open(filepath, 'rb') as f:
                reader = PdfReader(f)
                content = ''
                for page in reader.pages:
                    content += page.extract_text()
                data[filename] = content
        full_data+=content
    return full_data
full_data = load_pdf_content(root_dir)

In [None]:
len(full_data)

547217

# **FAISS VECTOR STORE SETUP**

In [None]:
!pip install faiss-cpu

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain.vectorstores import FAISS
vectorstore = FAISS.from_documents(documents=full_data, embedding = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY))

# **CREATING VECTOR EMBEDDINGS**

In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
texts = text_splitter.split_text(full_data)

# Convert text data into embeddings using GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001", google_api_key=GOOGLE_API_KEY)
vector_index = Chroma.from_texts(texts, embeddings).as_retriever(search_kwargs={"k":5})
vector_index.storage_context.save_local("vector_index")

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=vector_index,
    return_source_documents=True
)

# **PROMPT SETUP AND LOAD QA CHAIN**

In [None]:
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer.
And also add follow-up questions based on the question and answer given
{context}
Question: {question}

Follow-up Question:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(template)
qa_chain = RetrievalQA.from_chain_type(
    model,
    retriever=vector_index,
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

# **SAMPLE QUERY AND RESPONSES**

In [None]:
question = "What is the per share revenue for Meta during 2022?"
result = qa_chain({"query": question})
result["result"]

"I'm sorry, but I cannot find the answer to your question in the context provided."

In [None]:
Markdown(result["result"])

I'm sorry, but I cannot find the answer to your question in the context provided.

In [None]:
question = "Provide the Second Quarter 2023 Financial Highlights of Meta"
result = qa_chain({"query": question})
Markdown(result["result"])

I'm sorry, but the context you provided does not contain the Second Quarter 2023 Financial Highlights of Meta. Therefore, I cannot answer your question.

In [None]:
Markdown(result["result"])

In [None]:
question = "Provide the Second Quarter 2023 table of Financial Highlights of Meta"
result = qa_chain({"query": question})
Markdown(result["result"])

| Three Months Ended June 30, | In millions, except percentages and per share amounts | 2023 | 2022 | % Change |
|---|---|---|---|---|
| Revenue | $ 31,999 | $ 28,822 | 11% |
| Costs and expenses | 22,607 | 20,464 | 10% |
| Income from operations | $ 9,392 | $ 8,358 | 12% |
| Operating margin | 29 % | 29 % | - |
| Provision for income taxes | $ 1,505 | $ 1,499 | -% |
| Effective tax rate | 16 % | 18 % | - |
| Net income | $ 7,788 | $ 6,687 | 16% |
| Diluted earnings per share (EPS) | $ 2.98 | $ 2.46 | 21% |

In [None]:
question = "give the net income of nvidia  for april 30 ,2023"
result = qa_chain({"query": question})
Markdown(result["result"])

In [None]:
question = "give the net income of meta and tesla  for Three Months Ended June 30,2023"
result = qa_chain({"query": question})
Markdown(result["result"])

Meta: $7,788 million
Tesla: $2,703 million

In [None]:
question = "according to the net income of meta and tesla  for Three Months Ended June 30,2023 which is better?"
result = qa_chain({"query": question})
Markdown(result["result"])

The provided context does not contain information about Meta's net income for the Three Months Ended June 30, 2023, so I cannot answer this question from the provided context.

In [None]:
question = "Which company do you think managed to keep their return on capital (ROE) over the next decade (2005-2014), in line or higher than the previous10 years? i.e. retained their moat"
result = qa_chain({"query": question})
Markdown(result["result"])

I'm sorry, I do not have access to the context you provided, so I cannot answer this question.

In [None]:
question = "Comparative Stock Charts of Walt Disney vs. Nintendo"
result = qa_chain({"query": question})
Markdown(result["result"])

Walt Disney went from 12.6% ROE (1995-2004) to 13.01% (2005-2014) while Nintendo went from 15.1% ROE (1995-2004) to 9.4% (2005-2014).  The stock prices? Disney was up 298% and Nintendo was down 11.5%.  
Rationale: Walt Disney Corporation has a strong brand of characters and theme parks; not to mention that ESPN is the most profitable television stations in the world.  Their brands are about as strong as they come, just ask any kid under the age of 10.  Nintendo, on the other hand, does have a decent brand, but video games are a hit and miss business.  Nintendo actually lost money in 2011 and 2013 because of poor sales.     

Follow up questions:

1. What were the main things that happened to Disney and Nintendo during the next decade (2005-2014)?
2. What could you identify as Disney’s moat?  What about Nintendo’s?  
3. What is it about that businesses that makes them sticky?  Is one “stickier” than the other?

In [None]:
question = "What were the main things that happened to Disney and Nintendo during the next decade (2005-2014)?"
result = qa_chain({"query": question})
Markdown(result["result"])

Disney increased from 13.7% to 30.9% return on equity (ROE) compared with HP whose ROE decreased from 12.2% to 10%.  People simply like iPhones more than PCs, and Apple has a much stronger brand image, ecosystem around its products, and quality of design than HP.   Specifically, Apple benefits from the economic moats of 1) a strong brand (they price their products at a premium) and 2) high switching costs (the Apple ecosystem makes it costly to switch to Android or Windows once you’ve bought movies, music and apps on iTunes).  HP really just makes another run of the mill computer for cheap.  And their stock prices?  Stock performance follows company performance, so Apple’s stock increased 975% while HP increased by a measly 40% over the time period.

In [None]:
question = "compare the cost and expenses for three months ended june 30,2023 and Six Months Ended June 30, for Meta with year 2022"
result = qa_chain({"query": question})
Markdown(result["result"])

| Period | Cost and Expenses | Change |
|---|---|---|
| Three Months Ended June 30, 2023 | $22,607 million | 10% |
| Six Months Ended June 30, 2023 | $44,026 million | 9% |
| Three Months Ended June 30, 2022 | $20,464 million | - |
| Six Months Ended June 30, 2022 | $39,848 million | - |

In [None]:
question = "What were the main things that happened to Disney and Nintendo during the next decade (2005-2014)?"
result = qa_chain({"query": question})
Markdown(result["result"])

Disney increased from 13.7% to 30.9% return on equity (ROE) compared with HP whose ROE decreased from 12.2% to 10%.  People simply like iPhones more than PCs, and Apple has a much stronger brand image, ecosystem around its products, and quality of design than HP .   Specifically, Apple benefits from the economic moats of 1) a strong brand (they price their products at a premium) and 2) high switching costs (the Apple ecosystem makes it costly to switch to Android or Windows once you’ve bought movies, music and apps on iTunes).  HP really just makes another run of the mill computer for cheap.  And their stock prices?  Stock performance follows company performance, so Apple’s stock increased 975% while HP increased by a measly 40% over the time period.