In [2]:
!pip install -qU \
  sagemaker==2.173.0 --no-deps \
  pinecone-client==2.2.1 --no-deps \
  ipywidgets==8.0.0 --no-deps \
  PyPDF2 --no-deps \
  faiss-cpu --no-deps \
  langchain --no-deps \
  numpy==1.26.4 --no-deps \
  pandas==2.2.3 --no-deps \
  ipython --no-deps \
  ipython_genutils --no-deps \
  jupyterlab_widgets --no-deps \
  widgetsnbextension --no-deps \
  sentence-transformers --no-deps


In [4]:
import sagemaker 
from sagemaker.huggingface import(
HuggingFaceModel,
get_huggingface_llm_image_uri
)

role = sagemaker.get_execution_role()

hub_config = {
    'HF_MODEL_ID':'google/flan-t5-large',
    'HF_TASK':'text-generation'
}

llm_image = get_huggingface_llm_image_uri(
    "huggingface",
    version="0.8.2"
)
huggingface_model = HuggingFaceModel(
    env=hub_config,
    role=role,
    image_uri=llm_image
)

In [7]:
llm = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.m5.2xlarge",
    endpoint_name="flan-t5-demo2"
)

---------------!

In [8]:
from sagemaker.predictor import Predictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

# 👇 Rebuild your predictor properly
llm = Predictor(
    endpoint_name="flan-t5-demo2",
    serializer=JSONSerializer(),           # sends JSON with correct Content-Type
    deserializer=JSONDeserializer()        # parses JSON response
)


In [9]:
question = "What is bubble sort"
response = llm.predict({"inputs": question})

print(response)


[{'generated_text': 'bubbles'}]


In [10]:
context = """Managed Spot training can be used with all instances supported in Amazon SageMaker. Managed Spot Training is supported in all AWS Regions where Amazon SageMaker is currently available"""

In [11]:
prompt_template = """Answer the following QUESTION based on the CONTEXT given. If you do not know the answer and the CONTEXT doesn't contain the answer truthfully say "I don't know".
CONTEXT:
{context}

QUESTION:
{question}

ANSWER:
"""

text_input = prompt_template.replace("{context}",context).replace("{question}",question) 
out = llm.predict({"inputs":text_input})
generated_text = out[0]["generated_text"]
print(f"[Input]: {question}\n[Output]: {generated_text}")

[Input]: What is bubble sort
[Output]: I don't know


In [12]:
unanswerable_question = "What color is my desk?"

text_input = prompt_template.replace("{context}",context).replace("{question}",unanswerable_question) 
out = llm.predict({"inputs":text_input})
generated_text = out[0]["generated_text"]
print(f"[Input]: {unanswerable_question}\n[Output]: {generated_text}")


[Input]: What color is my desk?
[Output]: I don't know


In [13]:
hub_config = {
    'HF_MODEL_ID': 'sentence-transformers/all-MiniLM-L6-v2', 
    'HF_TASK':'feature-extraction'
}

huggingface_model = HuggingFaceModel(
    env=hub_config,
    role = role,
    transformers_version="4.6",
    pytorch_version="1.7",
    py_version="py36",
)

In [14]:
encoder = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.t2.large",
    endpoint_name="minilm-demo"
)

--------!

In [16]:
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
import faiss
import numpy as np

In [17]:
import os
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
import faiss
import numpy as np
from sagemaker import Session
from sagemaker.predictor import Predictor
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer
import time  # Import the time module

# --- Set up SageMaker Predictor ---
endpoint_name = "minilm-demo"  # 🔁 Replace with your actual endpoint name
session = Session()

encoder = Predictor(
    endpoint_name=endpoint_name,
    serializer=JSONSerializer(),
    deserializer=JSONDeserializer(),
    sagemaker_session=session
)

# --- Load PDF from same folder ---
pdf_filename = "24042020_Transaction Management in DBMS.pdf"  # Update to the path of the sample PDF
pdf_path = os.path.join(os.getcwd(), pdf_filename)

reader = PdfReader(pdf_path)
pdf_text = "\n\n".join([page.extract_text() for page in reader.pages if page.extract_text()])

splitter = CharacterTextSplitter(separator="\n", chunk_size=500, chunk_overlap=50)
chunks = splitter.split_text(pdf_text)

# --- Generate Embeddings ---
valid_embeddings = []
valid_chunks = []
invalid_chunks = []

for idx, chunk in enumerate(chunks):
    if chunk.strip() == "":
        continue

    try:
        result = encoder.predict({"inputs": [chunk]})

        # Unwrap double list layer
        if isinstance(result, list) and isinstance(result[0], list):
            embedding = result[0][0]
        else:
            print(f"❌ Unexpected embedding format for chunk {idx + 1}: {result}")
            invalid_chunks.append(chunk)
            continue

        # Debug partial print
        print(f"✅ Result for chunk {idx + 1}: {embedding[:5]}...")

        if all(isinstance(x, (int, float)) for x in embedding):
            valid_embeddings.append(embedding)
            valid_chunks.append(chunk)
        else:
            print(f"❌ Invalid embedding values for chunk {idx + 1}")
            invalid_chunks.append(chunk)

    except Exception as e:
        print(f"❌ Error generating embedding for chunk {idx + 1}: {e}")
        invalid_chunks.append(chunk)

if not valid_embeddings:
    print(f"❌ No valid embeddings generated. Invalid chunks: {len(invalid_chunks)}")
    raise ValueError("No valid embeddings generated.")

# --- Build FAISS index ---
dimension = len(valid_embeddings[0])
index = faiss.IndexFlatL2(dimension)
index.add(np.array(valid_embeddings).astype("float32"))
print(f"✅ FAISS index built with {len(valid_embeddings)} chunks.")

# --- Context Retrieval ---
def retrieve_context(question):
    result = encoder.predict({"inputs": [question]})
    question_embedding = result[0][0]  # Same unwrapping here
    D, I = index.search(np.array([question_embedding]).astype("float32"), k=1)
    return valid_chunks[I[0][0]]

# --- Answer Generation ---
def generate_answer(question):
    # Start the timer when the function is called
    start_time = time.time()

    context = retrieve_context(question)
    text_input = prompt_template.replace("{context}", context).replace("{question}", question)
    out = llm.predict({"inputs": text_input})
    answer = out[0]["generated_text"]

    # Stop the timer and calculate elapsed time
    end_time = time.time()
    elapsed_time = end_time - start_time  # in seconds

    # Print the time taken for processing
    print(f"⏱️ Time taken for processing: {elapsed_time:.2f} seconds")

    return answer

# --- Sample Questions ---
questions = [
    "What is rollback in ACID property"
]

print("\n📋 Answering questions:")
for q in questions:
    ans = generate_answer(q)
    print(f"\nQ: {q}\nA: {ans}")


✅ Result for chunk 1: [0.26777422428131104, -0.12856808304786682, -0.25632742047309875, 0.005600318778306246, -0.46683451533317566]...
✅ Result for chunk 2: [0.07167105376720428, -0.015892410650849342, -0.16532552242279053, -0.02619650401175022, -0.4145868122577667]...
✅ Result for chunk 3: [-0.058464471250772476, 0.0032196284737437963, -0.3308429718017578, 0.2719138562679291, -0.19384631514549255]...
✅ Result for chunk 4: [0.05875478684902191, 0.016153700649738312, -0.08368296176195145, 0.038199614733457565, -0.6158127188682556]...
✅ Result for chunk 5: [0.34844034910202026, -0.1921585202217102, -0.15035594999790192, 0.05665930360555649, -0.341560423374176]...
✅ Result for chunk 6: [0.3743951916694641, -0.2476978749036789, -0.14139395952224731, 0.31055647134780884, -0.1806740164756775]...
✅ Result for chunk 7: [0.2918042242527008, -0.09482786804437637, -0.03357619047164917, -0.21713602542877197, 0.05217383801937103]...
✅ Result for chunk 8: [0.18471424281597137, -0.2693515121936798, -