In [1]:
# import Libraries
import langchain
import pinecone
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Pinecone
from langchain.llms import CTransformers
from langchain.chains.question_answering import load_qa_chain

  from tqdm.autonotebook import tqdm


In [2]:
def read_doc(directory):
    file_loader=PyPDFDirectoryLoader(directory)
    documents=file_loader.load()
    return documents

In [3]:
doc=read_doc('documents/')
doc

[Document(page_content='TO BE APPEAR AT: IEEE COMMUNICATIONS SURVEYS & TUTORIALS, VOL. XX, NO. X, XX 2019 1\nSecurity of the Internet of Things:\nVulnerabilities, Attacks and Countermeasures\nIsmail Butun, Member, IEEE, Patrik ¨Osterberg, Member, IEEE,\nand Houbing Song, Senior Member, IEEE\nAbstract —Wireless Sensor Networks (WSNs) constitute one of the most promising third-millennium technologies and have wide range\nof applications in our surrounding environment. The reason behind the vast adoption of WSNs in various applications is that they have\ntremendously appealing features, e.g., low production cost, low installation cost, unattended network operation, autonomous and\nlongtime operation. WSNs have started to merge with the Internet of Things (IoT) through the introduction of Internet access capability\nin sensor nodes and sensing ability in Internet-connected devices. Thereby, the IoT is providing access to huge amount of data,\ncollected by the WSNs, over the Internet. Howev

In [4]:
doc

25

In [5]:
## Dividing the doc into chunks
def chunk_data(docs,chunk_size=800,chunk_overlap=50):
    text_splitter=RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    doc=text_splitter.split_documents(docs)
    return doc

In [6]:
documents=chunk_data(docs=doc)
len(documents)

216

In [8]:
documents[:100]

[Document(page_content='TO BE APPEAR AT: IEEE COMMUNICATIONS SURVEYS & TUTORIALS, VOL. XX, NO. X, XX 2019 1\nSecurity of the Internet of Things:\nVulnerabilities, Attacks and Countermeasures\nIsmail Butun, Member, IEEE, Patrik ¨Osterberg, Member, IEEE,\nand Houbing Song, Senior Member, IEEE\nAbstract —Wireless Sensor Networks (WSNs) constitute one of the most promising third-millennium technologies and have wide range\nof applications in our surrounding environment. The reason behind the vast adoption of WSNs in various applications is that they have\ntremendously appealing features, e.g., low production cost, low installation cost, unattended network operation, autonomous and\nlongtime operation. WSNs have started to merge with the Internet of Things (IoT) through the introduction of Internet access capability', metadata={'source': 'documents\\cyber attack.pdf', 'page': 0}),
 Document(page_content='in sensor nodes and sensing ability in Internet-connected devices. Thereby, the IoT is 

In [10]:
embeddings = HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

In [11]:
vectors=embeddings.embed_query("How are you")
len(vectors)

768

In [12]:
## vector search DB
pinecone.init(
            api_key="5bb55009-ebdd-4cf5-94f6-5ac2fd24893f",
            environment="gcp-starter")
index_name='langchainvector'
index=Pinecone.from_documents(documents[:100],embeddings, index_name=index_name)

In [10]:
#cosine similarity
def retrieve_query(query,k=2):
    matching_result=index.similarity_search(query,k=k)
    return matching_result

In [11]:
llm=CTransformers(model='llama-2-7b-chat.ggmlv3.q8_0.bin',
                      model_type='llama',
                      config={'max_new_tokens':256,
                              'context_length':4096,
                              'temperature':0.01})

chain=load_qa_chain(llm,chain_type="stuff")

In [12]:
## search answers from vectorDB

def retrieve_answers(query):
    doc_search=retrieve_query(query)
    print(doc_search)
    response=chain.run(input_documents=doc_search,question=query)
    return response

In [13]:
our_query="what is Active attack "
ans=retrieve_answers(our_query)
print(ans)

[Document(page_content='8 TO BE APPEAR AT: IEEE COMMUNICATIONS SURVEYS & TUTORIALS, VOL. XX, NO. X, XX 2019\nof a previously hacked nodes [69]. Like the Sybil attack, the\nnode-replication (clone) attack also can enable attackers to\nsubvert data aggregation, misbehavior detection, and voting\nprotocols by injecting false data or suppressing legitimate\ndata [70].\n2.2.3.4 Routing Attacks:\n•Misdirection: In misdirection attack, an attacker for-\nwards ongoing messages to the wrong paths inten-\ntionally. This can be achieved by fabricating false\nrouting advertisements and causing routing tables of\nthe neighboring nodes’ to update these false informa-\ntion [57]. This attack is also categorized as DoS attack,\nhence targeted nodes are blacked out completely\nand do not receive any further packets after the\nadvertisement of the false routing information.\n•Network Partitioning: A fully connected network\nis portioned to sub-networks in which the nodes\nin different sub-networks canno