In [1]:
!pip --quiet install langchain-openai langchain langchainhub openai chromadb tiktoken pypdf tavily-python urllib3==1.26.15
import os
import chromadb

# prefer this is set already in OS level...
os.environ.setdefault('OPENAI_API_KEY', '<put your key here if not in env>')

from langchain_openai import OpenAI, ChatOpenAI, OpenAIEmbeddings
from langchain.chains import RetrievalQA
from langchain.chains.question_answering import load_qa_chain
from langchain.document_loaders import PyPDFLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m23.3.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [2]:
# automatically creating a chroma db, chunking text from parsed PDF, and persisting vectors in memory
loader = PyPDFLoader("https://www.security.ntt/reports/Cyber-Security-Reports-2023-01-01.pdf")
index = VectorstoreIndexCreator(
    # split the documents into chunks
    text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0),
    # select which embeddings we want to use
    embedding=OpenAIEmbeddings(),
    # use Chroma as the vectorestore to index and search embeddings
    vectorstore_cls=Chroma
).from_loaders([loader])

client = chromadb.Client()
collection = client.get_collection(name="langchain")
print("Total items in collection:" + str(collection.count()))

Total items in collection:18


In [3]:
question = "How can ChatGPT be used in cyber attacks?"

## Testing run with LLEC chain/ chain_type=stuff

In [4]:
## with LLEC chain
db = Chroma(collection_name="langchain",embedding_function=OpenAIEmbeddings())

retriever = db.as_retriever()

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(template)

model = ChatOpenAI()

chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

chain.invoke(question)

'ChatGPT can be used in cyberattacks for various purposes, such as generating text for phishing emails and social hacking. It can interact with humans and impersonate specific individuals, making it useful for scams and other fraudulent activities. Additionally, ChatGPT can store information about program development, which could be exploited for the development of malware. However, OpenAI, the developer of ChatGPT, has banned antisocial use and prevents the AI from answering questions that could lead to abuse or cyberattacks.'

## Testing 4 different chain_types and their execution times.

In [5]:
%time index.query(llm=OpenAI(), question=question, chain_type="map_reduce")

CPU times: user 125 ms, sys: 36.5 ms, total: 162 ms
Wall time: 17 s


' It is believed that ChatGPT can be used in cyberattacks, and its developer OpenAI has taken steps to prevent its misuse. However, as the use of generative AI becomes more prevalent, unexpected uses may be discovered in the realm of cybersecurity.'

In [6]:
%time index.query(llm=OpenAI(), question=question, chain_type="stuff")

CPU times: user 49.5 ms, sys: 5 ms, total: 54.5 ms
Wall time: 8.81 s


' ChatGPT can be used in cyber attacks by generating text that can be used for phishing emails or developing malware.'

In [7]:
%time index.query(llm=OpenAI(), question=question, chain_type="refine")

CPU times: user 75.2 ms, sys: 6.4 ms, total: 81.6 ms
Wall time: 34.1 s


"\n\nChatGPT can be used in cyber attacks by generating email drafts and refining them to deceive recipients, leading them to open attachments that contain malicious VBA code. This has already been seen in real-world attacks, such as the Royal Mail cyber attack in 2022, where hackers used ChatGPT to craft convincing emails containing ransomware. This trend has raised concerns about the potential for AI-generated content to be used for malicious purposes, as it can bypass traditional email security measures. Additionally, the use of ChatGPT in cyber attacks has been observed by security researchers and companies, such as Kaspersky and Check Point Research, who have warned about the potential threats posed by this technology.\n\nMoreover, ChatGPT has the capability to generate text that is convincing to humans and can be used for social hacking, such as phishing emails. With the rapid development and adoption of generative AI, ChatGPT and other similar tools may hold the key to future cy

In [8]:
# sometimes this fails... https://github.com/langchain-ai/langchain/issues/12459
%time index.query(llm=OpenAI(), question=question, chain_type="map_rerank")



CPU times: user 52.6 ms, sys: 5.26 ms, total: 57.8 ms
Wall time: 11.4 s


' ChatGPT can be used to generate email drafts for phishing attacks and develop VBA code embedded in Excel macro files.'