# Installing Libraries

In [31]:
# pip install faiss-gpu

In [32]:
# pip install -q langchain
# pip install -q langchain_community
# pip install -q sentence_transformers
# pip install -q bitsandbytes
# pip install -q accelerate
# pip install -q ctransformers
# pip install -q pypdf

# Libraries

In [33]:
import os
import warnings
warnings.filterwarnings("ignore") #ignores all warnings
from langchain.llms import CTransformers #used for quantised model
from sentence_transformers import SentenceTransformer #embeddings
from langchain.embeddings import HuggingFaceEmbeddings #embeddings for sentence and token level
from langchain.document_loaders import PyPDFLoader #to import PDFs
from langchain.text_splitter import RecursiveCharacterTextSplitter #converting text to tokens
from langchain.vectorstores import FAISS #vectorDB
from langchain.chains import ConversationalRetrievalChain #retrieves the relevant documents and stores the history
from langchain.prompts import PromptTemplate
import pickle #store the model


# LLM

In [34]:
model_path = "llama-2-7b-chat.ggmlv3.q4_0.bin"

In [35]:
llm = CTransformers(
    model=model_path,
    model_type="llama",
    config={
        'max_new_tokens': 600,  # number of words in response
        'temperature': 0.01,
        'context_length': 5000  # previous tokens for context
    }
)

In [36]:
embedding_model = SentenceTransformer('BAAI/bge-base-en-v1.5', cache_folder = ".")

In [37]:
embedding_model_path = "C:/Users/snehsrin/Desktop/All/Self/OIC/oic/oic/spiders/models--BAAI--bge-base-en-v1.5/snapshots/a5beb1e3e68b9ab74eb54cfd186867f64f240e1a"

In [38]:
embeddings = HuggingFaceEmbeddings(
    model_name = embedding_model_path,
    model_kwargs = {'device':'cpu'},
    encode_kwargs = {'normalize_embeddings': True}
)

# Text Splitter

In [46]:
base_path = "C:/Users/snehsrin/Desktop/All/Self/OIC/oic/oic/spiders/pdfs/"

In [47]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)

In [51]:
all_chunks = []

for i in range(1, 108):
    print(i)
    pdf_path = f"{base_path}{i}.pdf"
    pdf_reader = PyPDFLoader(pdf_path)

    document = pdf_reader.load()

    chunks = text_splitter.split_documents(document)

    all_chunks.extend(chunks)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107


In [52]:
print(len(document))

29


In [43]:
all_chunks

[Document(metadata={'source': 'C:/Users/snehsrin/Desktop/All/Self/OIC/oic/oic/spiders/pdfs/1.pdf', 'page': 0}, page_content='Oracle® Cloud\nGetting Started with Oracle Integration\nGeneration 2\nF31221-33\nOctober 2022'),
 Document(metadata={'source': 'C:/Users/snehsrin/Desktop/All/Self/OIC/oic/oic/spiders/pdfs/1.pdf', 'page': 1}, page_content='Oracle Cloud Getting Started with Oracle Integration Generation 2, \nF31221-33\nCopyright © 2020, 2022, Oracle and/or its affiliates.\nPrimary Author: Oracle Corporation\nThis software and related documentation are provided under a license agreement containing restrictions on\nuse and disclosure and are protected by intellectual property laws. Except as expressly permitted in your\nlicense agreement or allowed by law, you may not use, copy, reproduce, translate, broadcast, modify, license,\ntransmit, distribute, exhibit, perform, publish, or display any part, in any form, or by any means. Reverse\nengineering, disassembly, or decompilation of th

In [44]:
for i in range(len(all_chunks)):
  print(len(all_chunks[i].page_content))

89
999
947
903
975
772
958
951
781
963
943
964
989
398
967
824
443
950
834
698
999
346
978
856
965
829
983
242
934
471
996
199
971
539
950
999
557
136
517
995
937
949
386
281
994
339
97
91
839
765
113
955
975
991
374
881
881
960
955
974
955
816
988
994
907
918
962
347
801
951
972
246
317
973
773
989
539
994
791
966
608
998
875
959
966
692
997
673
964
374
950
940
943
681
995
994
435
957
989
187
940
923
765
987
997
225
987
991
531
978
859
983
981
545
940
921
488
973
966
586
960
956
282
987
963
412
948
954
379
947
946
334
964
982
240
946
985
520
974
981
219
945
927
818
996
992
201
968
989
658
981
963
515
998
993
806
934
986
981
950
701
966
819
944
998
495
956
972
532
926
959
590
976
962
980
997
431
998
994
438
977
970
484
995
955
975
837
957
994
622
948
993
435
952
927
947
964
535
984
868
934
984
735
1000
964
582
943
851
960
985
453
939
962
249
983
966
307
995
848
958
980
315
919
919
805
931
960
248
943
969
494
932
987
276
981
911
926
968
312
545
771
915
767
918
940
730
471
928
858
922
99

# Storing in Database

In [53]:
vector_db = FAISS.from_documents(documents=all_chunks, embedding=embeddings)

In [54]:
# Storing vector index create in local
file_path="oic_llama.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vector_db, f)

In [55]:
if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

In [56]:
retriever = vector_db.as_retriever() #retrieve relevant information based on a query

# Prompt and Retrieval

In [59]:
template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
Follow up Input: {question}
Standalone questions: """

#makes a new question based on the history
# Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
# Chat History:
# User: How do I reset my password?
# Bot: You can reset it by clicking 'Forgot Password' on the login page.
# Follow up Input: What if I don't receive the reset email?
# Standalone question: What should I do if I don't receive the password reset email after clicking 'Forgot Password' on the login page?

In [60]:
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(template) #transform the question to the template

In [61]:
chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever = retriever, condense_question_prompt = CONDENSE_QUESTION_PROMPT, return_source_documents = True, verbose = True )
#return docs provides the docs it used to generate the answer
#this fetches the relevant documents and generates the answer for the question

# Asking the Query

In [62]:
chat_history = []
query = "Explain about alert notification in OIC2?"
result = chain.invoke({"question":query, "chat_history":chat_history})
print(result["answer"])



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

• Configure the email for the alert notification:
– To: Enter one or more individual or group email addresses, separated by
commas.
– From: Select a sender from the dropdown list, which is populated by the
configuration on the Notifications page in Oracle Integration. See Send
Service Failure Alerts, System Status Reports, and Integration Error
Reports by Notification Emails in Using Integrations in Oracle Integration.
– Subject and Body: Enter text you want to provide for the notification. The
body text supports HTML markup tags. Type $ to select and insert the
unique instance identifier, indicators, or a link to the Business Transaction
Details dashboard.
Note:
To be successfully extracted,

In [63]:
from langchain_core.messages import HumanMessage,AIMessage

In [64]:
chat_history.extend(
    [
        HumanMessage(content = query),
        AIMessage(content = result["answer"])
    ]
)
#appending the chat history

In [65]:
chat_history

[HumanMessage(content='Explain about alert notification in OIC2?', additional_kwargs={}, response_metadata={}),
 AIMessage(content=' In Oracle Integration Cloud (OIC), an alert notification is a way to notify users of important events or issues related to their processes. The email contains information about the event, such as the name of the process that failed, the status of the failure, and any other relevant details. Users can customize the email content by adding their own text or using predefined placeholders for dynamic data. Additionally, users can configure the email to be sent only to specific recipients based on their role in the process.', additional_kwargs={}, response_metadata={})]