In [1]:
!pip install langchain boto3 faiss-cpu sentence-transformers sagemaker awscli

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-5.1.2-py3-none-any.whl.metadata (16 kB)
Collecting awscli
  Downloading awscli-1.42.59-py3-none-any.whl.metadata (11 kB)
INFO: pip is looking at multiple versions of awscli to determine which version is compatible with other requirements. This could take a while.
  Downloading awscli-1.42.58-py3-none-any.whl.metadata (11 kB)
  Downloading awscli-1.42.57-py3-none-any.whl.metadata (11 kB)
  Downloading awscli-1.42.56-py3-none-any.whl.metadata (11 kB)
  Downloading awscli-1.42.55-py3-none-any.whl.metadata (11 kB)
  Downloading awscli-1.42.54-py3-none-any.whl.metadata (11 kB)
  Downloading awscli-1.42.53-py3-none-any.whl.metadata (11 kB)
  Downloading awscli-1.42.52-py3-none-any.whl.metadata (11 kB)
INFO: pip is still looking at multiple versions of awscli to determine which version is compatib

In [6]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from transformers import pipeline


In [7]:
# 1. Load local dataset
with open("knowledge.txt", "r") as f:
    data = f.read()

In [8]:
# 2. Split text into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=20)
docs = splitter.create_documents([data])


In [9]:
# 3. Create embeddings and store locally
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embedding_model)

In [10]:
# 4. Initialize a local LLM (or use a hosted one via API)
llm_pipeline = pipeline(
    "text-generation",
    model="google/flan-t5-base",
    tokenizer="google/flan-t5-base",
    max_new_tokens=128
)
llm = HuggingFacePipeline(pipeline=llm_pipeline)


Device set to use cpu
The model 'T5ForConditionalGeneration' is not supported for text-generation. Supported models are ['PeftModelForCausalLM', 'ArceeForCausalLM', 'AriaTextForCausalLM', 'BambaForCausalLM', 'BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BitNetForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'Cohere2ForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'DeepseekV2ForCausalLM', 'DeepseekV3ForCausalLM', 'DiffLlamaForCausalLM', 'DogeForCausalLM', 'Dots1ForCausalLM', 'ElectraForCausalLM', 'Emu3ForCausalLM', 'ErnieForCausalLM', 'Ernie4_5ForCausalLM', 'Ernie4_5_MoeForCausalLM', 'Exaone4ForCausalLM', 'FalconForCausalLM', 'FalconH1ForCausalLM', 'FalconMambaForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForC

In [11]:
# 5. Create RAG chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vectorstore.as_retriever(search_kwargs={"k": 2})
)


In [12]:
# 6. Ask a question
query = "What is SageMaker used for?"
answer = qa.run(query)

print("Q:", query)
print("A:", answer)

Q: What is SageMaker used for?
A: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Amazon SageMaker is a fully managed machine learning service provided by AWS.
It enables developers to build, train, and deploy machine learning models quickly.

SageMaker provides tools for data labeling, model monitoring, and MLOps pipelines.
EOF

Question: What is SageMaker used for?
Helpful Answer:


In [15]:
vectorstore.save_local("faiss_index")

In [21]:
# Later reload
new_vs = FAISS.load_local("faiss_index/index.pkl", embedding_model)

ValueError: The de-serialization relies loading a pickle file. Pickle files can be modified to deliver a malicious payload that results in execution of arbitrary code on your machine.You will need to set `allow_dangerous_deserialization` to `True` to enable deserialization. If you do this, make sure that you trust the source of the data. For example, if you are loading a file that you created, and know that no one else has modified the file, then this is safe to do. Do not set this to `True` if you are loading a file from an untrusted source (e.g., some random site on the internet.).