In [1]:
from datasets import load_dataset

dataset = load_dataset("jiandong/crimson-attck-vectors",split='train')
format_func = lambda data: f"id: {data['id']}, attck_id: {data['attck_id']}, 'attck_name{data['attck_name']}', 'description{data['description']}', 'kill_chain_phases{data['kill_chain_phases']}', 'domains{data['domains']}', 'tactic_type{data['tactic_type']}'"


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
print(dataset.features)

{'id': Value(dtype='string', id=None), 'attck_id': Value(dtype='string', id=None), 'attck_name': Value(dtype='string', id=None), 'description': Value(dtype='string', id=None), 'kill_chain_phases': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None), 'domains': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None), 'tactic_type': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None), 'embedding': Sequence(feature=Value(dtype='float32', id=None), length=-1, id=None)}


In [4]:
from dotenv import load_dotenv
import os 
load_dotenv()


from langchain_community.vectorstores.faiss import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_text(format_func(dataset))

# graph
# embedding 
vectorstore = FAISS.from_texts(texts=splits, embedding=OllamaEmbeddings())
retriever = vectorstore.as_retriever()

In [None]:
docs = retriever.invoke("")

In [1]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [None]:
dir(prompt)

In [None]:
example_messages = prompt.invoke(
    {"context": "filler context", "question": "filler question"}
).to_messages()
example_messages
print(example_messages[0].content)

In [6]:
from langchain_community.llms.ollama import Ollama

llm = Ollama()
llm.invoke("test")

'Hello! How can I assist you today? Is there something specific you would like to know or discuss?'

In [7]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join( doc.page_content for doc in docs)


rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [None]:
""" ref
T1651	Cloud Administration Command	Adversaries may abuse cloud management services to execute commands within virtual machines. Resources such as AWS Systems Manager, Azure RunCommand, and Runbooks allow users to remotely run scripts in virtual machines by leveraging installed virtual machine agents.
"""

"""
What are cloud management services, and how can they be abused by adversaries?
How do resources like AWS Systems Manager, Azure RunCommand, and Runbooks allow users to execute commands within virtual machines?
What are the potential risks of adversaries leveraging cloud management services to execute commands?
How can one identify and prevent adversaries from abusing cloud management services for command execution?
Do cloud providers offer security measures to prevent adversaries from abusing these management services?
"""

ans = rag_chain.invoke("What are cloud management services, and how can they be abused by adversaries?")

In [None]:
import translators as ts
print(ts.translate_text(query_text=ans, translator='google', from_language= 'en', to_language='zh-TW'))