In [11]:
import sqlite3
conn = sqlite3.connect("storage/file_monitor.db")
cursor =conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS files (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    file_name TEXT NOT NULL
);
""")
conn.commit()

In [1]:
from langchain_community.document_loaders import PyPDFLoader

file_paths = [
    'storage/2022 M&I Shutdown closure report.pdf',
    'storage/FCC INDMAX OPERATING MANUAL 2020.pdf',
    'storage/FINAL DOCUMENT DATA BOOK.pdf'
]

pages = []

for file_path in file_paths:
    loader = PyPDFLoader(file_path)
    for page in loader.lazy_load():
        pages.append(page)

In [2]:
import getpass
from dotenv import load_dotenv
from config import get_config

key=get_config()






In [4]:
import os
os.environ["OPENAI_API_KEY"] = key['KEY']

from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")

In [5]:
import faiss
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)
batch_size = 100  # you can tune this depending on GPU/CPU RAM

for i in range(0, len(pages), batch_size):
    batch = pages[i:i + batch_size]
    vector_store.add_documents(batch)

In [None]:
index_dir = "faiss_indexes" 
vector_store= FAISS.load_local('faiss',embeddings,

In [6]:
data=vector_store.similarity_search('''The primary gas vents contain orifices that keep the gas film between the faces.
Ensure that they are installed before the initial compressor startup or after turnaround. Without those
orifices, the primary seal will fail. The location and elevation of those orifices are important compared
to the compressor shaft center. GP''')



In [None]:
from llm.bot import load_gpt_llm
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from loging import setup_logger
from config import load_open_ai_api_config

config_values = load_open_ai_api_config()
key = config_values["api_key"]
logger = setup_logger('faiss_maker')
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key=key)

llm = load_gpt_llm()

# Define State
class ChatState(TypedDict):
    messages: Annotated[list, operator.add]
    context: str
    sources: list

# Node function
def retrieve_and_generate(state: ChatState):
    messages = state["messages"]
    last_message = messages[-1].content
    
    # Retrieve from FAISS
    index_dir = "faiss_indexes"
    vector_store = FAISS.load_local(index_dir, embeddings, allow_dangerous_deserialization=True)
    docs = vector_store.similarity_search(last_message, k=3)
    
    # Extract context and sources
    context = "\n\n".join([doc.page_content for doc in docs])
    sources = [doc.metadata.get('filename', 'Unknown') for doc in docs]
    unique_sources = list(set(sources))
    
    # Build prompt with history
    system_msg = SystemMessage(content="You are to generate the relevant answer based on the context for the input question")
    context_msg = SystemMessage(content=f"Context:\n{context}")
    
    # Prepare messages for LLM (include history)
    llm_messages = [system_msg, context_msg] + messages
    

    final_answer= llm.invoke(llm_messages)
    
    # Add sources to response
    response_with_sources = f"{final_answer.content}\n\n**Sources:** {', '.join(unique_sources)}"
    
    return {
        "messages": [AIMessage(content=response_with_sources)],
        "context": context,
        "sources": unique_sources
    }

# Build graph
workflow = StateGraph(ChatState)
workflow.add_node("rag_agent", retrieve_and_generate)
workflow.set_entry_point("rag_agent")
workflow.add_edge("rag_agent", END)

# Compile with memory
memory = InMemorySaver()
app = workflow.compile(checkpointer=memory)

# Usage function
def context_aware_chatbot_stream(question, thread_id="default"):
    config = {"configurable": {"thread_id": thread_id}}
    
    # Use stream_mode="messages" for token-by-token streaming
    for message_chunk, metadata in app.stream(
        {"messages": [HumanMessage(content=question)]}, 
        config,
        stream_mode="messages"  # ✅ This enables token streaming
    ):
        # Only yield content chunks (skip empty ones)
        if message_chunk.content:
            yield message_chunk.content


# # Streaming version
# def context_aware_chatbot_stream(question, thread_id="default"):
#     """
#     Streaming version - yields chunks
#     """
#     index_dir = "faiss_indexes"
#     vector_store = FAISS.load_local(index_dir, embeddings, allow_dangerous_deserialization=True)
#     docs = vector_store.similarity_search(question, k=3)
    
#     context = "\n\n".join([doc.page_content for doc in docs])
#     sources = [doc.metadata.get('filename', 'Unknown') for doc in docs]
#     unique_sources = list(set(sources))
    
#     # Get conversation history
#     config = {"configurable": {"thread_id": thread_id}}
#     state = app.get_state(config)
#     history_messages = state.values.get("messages", []) if state.values else []
    
#     # Build prompt  with history
#     system_msg = f"""You are to generate the relevant answer based on the context for the input question
    
# Context:
# {context}"""
    
#     messages = [SystemMessage(content=system_msg)] + history_messages + [HumanMessage(content=question)]
    
#     # Stream response
#     full_response = ""
#     for chunk in llm.stream(messages):
#         full_response += chunk.content
#         yield chunk.content
    
#     # Yield sources
#     sources_text = f"\n\n**Sources:** {', '.join(unique_sources)}"
#     yield sources_text
    
#     # Save to memory after streaming
#     app.update_state(
#         config,
#         {
#             "messages": [
#                 HumanMessage(content=question),
#                 AIMessage(content=full_response + sources_text)
#             ]
#         }
#     )

In [53]:
answer=context_aware_chatbot_stream("hello can you tell me about CRITICAL JOB: REVISIONING OF REMOSA VALVES ")

In [45]:
config = {"configurable": {"thread_id": "default"}}
a=app.invoke(
        {"messages": [HumanMessage(content="hello can you tell me about CRITICAL JOB: REVISIONING OF REMOSA VALVES ")]}, 
        config,

    )

In [51]:
a['messages'][-1].content

'Here’s a concise overview of the CRITICAL JOB: REVISIONING OF REMOSA VALVES.\n\nWhat it was\n- A critical refurbishment/revisioning of REMOSA valves conducted during the IOCL Paradip Refinery M&I shutdown in 2022.\n- Carried out in the presence of the OEM, M/s REMOSA SRL.\n- A detailed OEM report was provided; the summary focuses on the eight valves touched and the spare parts replaced.\n\nValves touched and spares replaced\n1) Tag 23-LV-41 – Spent Catalyst Slide Valve\n   - Replaced: Orifice plate, packing gland, etc.\n2) Tag 23-TV-03 – Regenerated Catalyst Slide Valve\n   - Replaced: Orifice plate, disc, stem, packing gland, etc.\n3) Tag 23-XZV-42 – Diverter Valve\n   - Replaced: 36” SPWD gasket\n4) Tag 23-HV-17 – Double Disc Slide Valve\n   - Replaced: Stem, packing ring, bolts, lantern ring, etc.\n5) Tag 23-XV-2013 – PRT Flue gas Inlet Isolation Valve\n   - Remarks: No spare listed\n6) Tag 23-PV-96 – Flue gas to PRT inlet Butterfly valve\n   - Replaced: Gland packing, lantern ring

In [29]:
app.get_state(config={"configurable": {"thread_id":"default"}})

StateSnapshot(values={'messages': [HumanMessage(content='hello can you tell me about CRITICAL JOB: REVISIONING OF REMOSA VALVES ', additional_kwargs={}, response_metadata={}), AIMessage(content='Here’s a concise summary of the CRITICAL JOB: REVISIONING OF REMOSA VALVES from the IOCL Paradip PDR M&I shutdown 2022 document you provided.\n\nWhat it was\n- A critical refurbishment/revisioning task of REMOSA valves conducted during the IOCL Paradip Refinery shutdown in 2022.\n- Performed in the presence of the OEM M/s REMOSA SRL.\n- A detailed report was provided by the OEM; the message here is the summarized tabulated data of the job and the components involved.\n\nValves and replacements (spares replaced)\n1) Tag 23-LV-41 – Spent Catalyst Slide Valve\n   - Replaced: Orifice plate, packing gland, etc.\n2) Tag 23-TV-03 – Regenerated Catalyst Slide Valve\n   - Replaced: Orifice plate, disc, stem, packing gland, etc.\n3) Tag 23-XZV-42 – Diverter Valve\n   - Replaced: 36” SPWD gasket\n4) Tag 2

In [54]:
for chunk in answer:
    print(chunk)

content='' additional_kwargs={} response_metadata={} id='run--39047bcb-291f-4ccc-afa7-acdc6255030d'
content='Here' additional_kwargs={} response_metadata={} id='run--39047bcb-291f-4ccc-afa7-acdc6255030d'
content='’s' additional_kwargs={} response_metadata={} id='run--39047bcb-291f-4ccc-afa7-acdc6255030d'
content=' a' additional_kwargs={} response_metadata={} id='run--39047bcb-291f-4ccc-afa7-acdc6255030d'
content=' concise' additional_kwargs={} response_metadata={} id='run--39047bcb-291f-4ccc-afa7-acdc6255030d'
content=' summary' additional_kwargs={} response_metadata={} id='run--39047bcb-291f-4ccc-afa7-acdc6255030d'
content=' of' additional_kwargs={} response_metadata={} id='run--39047bcb-291f-4ccc-afa7-acdc6255030d'
content=' the' additional_kwargs={} response_metadata={} id='run--39047bcb-291f-4ccc-afa7-acdc6255030d'
content=' “' additional_kwargs={} response_metadata={} id='run--39047bcb-291f-4ccc-afa7-acdc6255030d'
content='CR' additional_kwargs={} response_metadata={} id='run--390

In [13]:
answer

<generator object context_aware_chatbot_stream at 0x7f6ce3296340>

In [22]:
answer2=context_aware_chatbot_stream('what was the previos answer about')

In [23]:
for chunk in answer2:
    print(chunk)

It
 was
 a
 concise
 summary
 of
 the
 “
CR
ITICAL
 JOB
:
 REV
ISION
ING
 OF
 REM
OSA
 VAL
VES
”
 from
 the
 IO
CL
 Parad
ip
 P
DR
 M
&I
 shutdown
 
202
2
.
 Specifically
,
 it
 described
:


-
 The
 task
 as
 a
 critical
 refurbishment
/re
vision
ing
 of
 REM
OSA
 valves
 during
 the
 Parad
ip
 refinery
 shutdown
,
 with
 OEM
 involvement
 (
M
/s
 REM
OSA
 SR
L
).

-
 The
 eight
 valves
 touched
 and
 the
 spare
 parts
 replaced
 for
 each
 (
e
.g
.,
 
23
-L
V
-
41
,
 
23
-TV
-
03
,
 
23
-X
Z
V
-
42
,
 etc
.),
 plus
 notes
 where
 no
 spare
 was
 listed
.

-
 Lessons
 learned
,
 mainly
 about
 essential
 tools
 and
 consum
ables
 needed
 for
 removing
 components
.

-
 A
 note
 that
 a
 detailed
 OEM
 report
 exists
,
 and
 the
 sources
 of
 the
 information
.


If
 you
 want
,
 I
 can
 pull
 out
 details
 for
 a
 specific
 valve
 or
 summarize
 the
 OEM
 findings
 more
 granular
ly
.
It was a concise summary of the “CRITICAL JOB: REVISIONING OF REMOSA VALVES” from the IOCL Paradip PD