In [114]:
!pip install -q langchain
!pip install -q langchain-community
!pip install -q faiss-cpu
!pip install -q pypdf
!pip install -q langchain-openai
!pip install -q tiktoken

In [115]:
from google.colab import userdata
OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')

In [117]:
import os
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [196]:
from langchain_openai import ChatOpenAI

In [199]:
llm = ChatOpenAI(temperature= 0.6)

In [200]:
llm.invoke("Hello how are you?")

AIMessage(content="Hello! I'm just a computer program, so I don't have feelings, but I'm here and ready to help you with anything you need. How can I assist you today?", additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 37, 'prompt_tokens': 12, 'total_tokens': 49, 'completion_tokens_details': {'audio_tokens': None, 'reasoning_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': None, 'cached_tokens': 0}}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-d597875c-d0eb-4400-99a9-164cb2023bd4-0', usage_metadata={'input_tokens': 12, 'output_tokens': 37, 'total_tokens': 49, 'input_token_details': {'cache_read': 0}, 'output_token_details': {'reasoning': 0}})

In [153]:
from langchain_community.document_loaders import PyPDFLoader

In [154]:
file_path = "/content/GE_Vernova_Sustainability_Report_2023.pdf"

In [155]:
loader = PyPDFLoader(file_path)

In [156]:
docs = loader.load()

In [157]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [158]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,
    chunk_overlap=200,
    length_function=len,
    is_separator_regex=False,)

In [159]:
text_chunks = text_splitter.split_documents(docs)

In [160]:
text_chunks

[Document(metadata={'source': '/content/GE_Vernova_Sustainability_Report_2023.pdf', 'page': 0}, page_content='SUSTAINABILITY\nREPORT 2023GE Vernova Sustainability\nBuilding a more sustainable \nelectric power system'),
 Document(metadata={'source': '/content/GE_Vernova_Sustainability_Report_2023.pdf', 'page': 1}, page_content='THE CHALLENGES  WE CONFRONTELECTRIFY DECARBONIZE\nDEMAND FOR ELECTRICITY IS \nEXPECTED TO GROW BY MORE THAN \n50% IN THE NEXT 20 YEARS\nEnergy security and growing demand \non the grid, including from other sectors \nlooking to electrify, are driving the need \nfor new generation TODAY .ELECTRIC POWER SECTOR  \nEMISSIONS ACCOUNT FOR  \n~40% OF ALL HUMAN-MADE CO₂\nWe need to GROW RENEWABLES AND \nNUCLEAR AS QUICKLY AS POSSIBLE  \nthis decade  and beyond.GROWING INDUSTRIALIZATION TRENDS, \nINCLUDING DEMAND FROM DATA CENTERS, \nMUST BE MET WHILE DECARBONIZING\nWe must INNOVATE TODAY THE \nBREAKTHROUGH TECHNOLOGIES  the \nworld needs tomorrow to simultaneously \nelec

In [161]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [162]:
from langchain_community.vectorstores import FAISS
vectordb = FAISS.from_documents(text_chunks, embeddings)

In [163]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever 

In [164]:
retriever=vectordb.as_retriever()

In [165]:
retriever_prompt = ("Given a chat history and the latest user question which might reference context in the chat history,"
    "formulate a standalone question which can be understood without the chat history."
    "Do NOT answer the question, just reformulate it if needed and otherwise return it as is."
    )

In [166]:
from langchain_core.prompts import ChatPromptTemplate

## History Aware Retiever

In [167]:
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
    ("system", retriever_prompt),
    MessagesPlaceholder(variable_name="chat_history"),
    ("human", "{input}"),
    ]
)
contextualize_q_prompt

ChatPromptTemplate(input_variables=['chat_history', 'input'], input_types={'chat_history': list[typing.Annotated[typing.Union[typing.Annotated[langchain_core.messages.ai.AIMessage, Tag(tag='ai')], typing.Annotated[langchain_core.messages.human.HumanMessage, Tag(tag='human')], typing.Annotated[langchain_core.messages.chat.ChatMessage, Tag(tag='chat')], typing.Annotated[langchain_core.messages.system.SystemMessage, Tag(tag='system')], typing.Annotated[langchain_core.messages.function.FunctionMessage, Tag(tag='function')], typing.Annotated[langchain_core.messages.tool.ToolMessage, Tag(tag='tool')], typing.Annotated[langchain_core.messages.ai.AIMessageChunk, Tag(tag='AIMessageChunk')], typing.Annotated[langchain_core.messages.human.HumanMessageChunk, Tag(tag='HumanMessageChunk')], typing.Annotated[langchain_core.messages.chat.ChatMessageChunk, Tag(tag='ChatMessageChunk')], typing.Annotated[langchain_core.messages.system.SystemMessageChunk, Tag(tag='SystemMessageChunk')], typing.Annotated[l

In [201]:
history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_prompt)

In [202]:
CHAT_BOT_TEMPLATE = """
     You are a PDF document assistant bot, specialized in providing answers based on the contents of the PDF file.
     If someone requests a summary, you will summarize the entire document. Use your knowledge to enhance the answers
     based on the context. Ensure your responses are relevant to the question and concise yet informative. Please avoid
     repeating sentences or phrases. If someone greets you, make sure to greet them back.
     if someone greets you greet them back.
     CONTEXT:
     {context}

     QUESTION: {input}

     YOUR ANSWER:

    """

In [170]:
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", CHAT_BOT_TEMPLATE),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{input}")
    ]
)

In [203]:
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

In [204]:
chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [205]:
chat_history= []

In [206]:
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory

In [207]:
store = {}

In [208]:
def get_session_history(session_id: str)-> BaseChatMessageHistory:
  if session_id not in store:
    store[session_id]= ChatMessageHistory()
  return store[session_id]

In [209]:
conversational_rag_chain = RunnableWithMessageHistory(
    chain,
    get_session_history,
    input_messages_key="input",
    history_messages_key="chat_history",
    output_messages_key="answer",
)

In [210]:
conversational_rag_chain.invoke(
    {"input": "Tell me about the ge vernova??"},
    config={
     "configurable": {"session_id": "abc123"}},
)["answer"]

'GE Vernova, an independent publicly traded company, emerged from a spin-off from GE on April 2, 2024. With around 75,000 team members worldwide, GE Vernova focuses on electrifying and decarbonizing the world. They offer solutions in Power, Wind, and Electrification segments, supported by Accelerators that drive innovation with a yearly investment of approximately $1 billion in research and development. GE Vernova aims to enhance electricity systems globally, increase power accessibility, and reduce carbon emissions, contributing to a sustainable energy transition.'

In [211]:
conversational_rag_chain.invoke(
    {"input": "tell me somethig more??"},
    config={
     "configurable": {"session_id": "abc123"}},
)["answer"]

"GE Vernova is purpose-built to address the challenges of the energy transition by providing innovative solutions across its Power, Wind, and Electrification segments. The company emphasizes collaboration within its various business units to deliver reliable, affordable, and secure electricity systems while also focusing on expanding access to power and reducing carbon emissions. GE Vernova's commitment to sustainability and technological advancement positions it as a key player in the global effort to address energy needs while mitigating environmental impact."