### PDF Extraction

In [1]:
import pdfplumber
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import os
from PIL import Image
import requests
import json
import base64
import pandas as pd
from dotenv import load_dotenv
import openai
from openai import OpenAI
from groq import Groq

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Load .env file
load_dotenv()

True

In [3]:
JINA_API_KEY = os.getenv("JINA_API_KEY")  # Correct
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")

In [7]:
output_dir = "../extracted_images"
os.makedirs(output_dir, exist_ok=True)

def extract_images_from_pdf(pdf_path, output_dir):
    image_paths = []
    with pdfplumber.open(pdf_path) as pdf:
        for page_number, page in enumerate(pdf.pages):
            for img_index, img in enumerate(page.images):
                image_data = img["stream"].get_data()
                image_format = "jpg"
                image_path = f"{output_dir}/page_{page_number+1}_img_{img_index}.{image_format}"
                
                with open(image_path, "wb") as f:
                    f.write(image_data)
                
                image_paths.append(image_path)
    
    return image_paths

# Extract images
pdf_path = "../SmartCleanX5_User_Manual_Indo_Updated.pdf"
image_paths = extract_images_from_pdf(pdf_path, output_dir)

# Print extracted image paths
print("Extracted images:", image_paths)

In [6]:
pdf_path = "../SmartCleanX5_User_Manual_Indo_Updated.pdf"


In [4]:
# Set up your Jina API Key
JINA_API_URL = "https://api.jina.ai/v1/embeddings"

def get_jina_embeddings(sentences):
    """
    Fetches embeddings from Jina's API for a list of sentences.
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {JINA_API_KEY}"
    }

    payload = {
        "input": sentences,
        "model": "jina-embeddings-v3"
    }

    response = requests.post(JINA_API_URL, headers=headers, data=json.dumps(payload))

    if response.status_code == 200:
        return [item["embedding"] for item in response.json()["data"]]  # Extract only embeddings
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None


In [3]:
def extract_text_from_pdf(pdf_path):
    """
    Extracts text from a PDF file using pdfplumber.
    """
    text_chunks = []
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            text = page.extract_text()
            if text:  # Only add non-empty pages
                text_chunks.append(text.strip())
    return text_chunks

In [4]:
def semantic_chunking(text_chunks, threshold=0.6):
    """
    Splits extracted text into meaningful chunks based on cosine similarity using Jina Embeddings API.
    """
    processed_chunks = []
    
    for text in text_chunks:
        sentences = text.split(". ")  # Basic sentence splitting

        # Get embeddings from Jina API
        embeddings = get_jina_embeddings(sentences)
        if embeddings is None:
            continue  # Skip if API call fails

        current_chunk = [sentences[0]]  # Start with first sentence
        for i in range(1, len(sentences)):
            similarity = cosine_similarity([embeddings[i]], [embeddings[i - 1]])[0][0]

            # Print similarity scores for debugging
            print(f"Similarity between:\n[{sentences[i - 1]}]\nand\n[{sentences[i]}] = {similarity:.4f}")

            if similarity > threshold:
                current_chunk.append(sentences[i])  # Merge into the same chunk
            else:
                processed_chunks.append(". ".join(current_chunk))  # Save chunk
                current_chunk = [sentences[i]]  # Start new chunk

        if current_chunk:
            processed_chunks.append(". ".join(current_chunk))  # Save last chunk

    return processed_chunks

In [7]:
extracted_text_chunks = extract_text_from_pdf(pdf_path)

# Apply semantic chunking
processed_semantic_chunks = semantic_chunking(extracted_text_chunks)

Similarity between:
[Halaman Sampul
Bagian 1: Pengantar Halaman Sampul
Selamat datang di bab 'Halaman Sampul' dalam buku petunjuk pengguna robot
vakum SmartClean X5 AI]
and
[Kami memahami pentingnya kesederhanaan dan
pengalaman pengguna yang lancar, dan kami bertujuan untuk memberikan kualitas
tersebut pada produk kami] = 0.4457
Similarity between:
[Kami memahami pentingnya kesederhanaan dan
pengalaman pengguna yang lancar, dan kami bertujuan untuk memberikan kualitas
tersebut pada produk kami]
and
[Bacalah petunjuk ini untuk memastikan bahwa Anda
menikmati interaksi yang lancar dan layanan yang lama dari vakum robot AI Anda.
'Halaman Sampul' adalah antarmuka pertama yang Anda temui saat memulai atau
mencerminkan menu digital SmartClean X5] = 0.5137
Similarity between:
[Bacalah petunjuk ini untuk memastikan bahwa Anda
menikmati interaksi yang lancar dan layanan yang lama dari vakum robot AI Anda.
'Halaman Sampul' adalah antarmuka pertama yang Anda temui saat memulai atau
mencerminkan m

In [33]:
# 🔹 Print first 5 chunks to verify
for i, chunk in enumerate(processed_semantic_chunks[:5]):
    print(f"Chunk {i+1}:\n{chunk}\n")

Chunk 1:
Halaman Sampul
Bagian 1: Pengantar Halaman Sampul
Selamat datang di bab 'Halaman Sampul' dalam buku petunjuk pengguna robot
vakum SmartClean X5 AI

Chunk 2:
Kami memahami pentingnya kesederhanaan dan
pengalaman pengguna yang lancar, dan kami bertujuan untuk memberikan kualitas
tersebut pada produk kami

Chunk 3:
Bacalah petunjuk ini untuk memastikan bahwa Anda
menikmati interaksi yang lancar dan layanan yang lama dari vakum robot AI Anda.
'Halaman Sampul' adalah antarmuka pertama yang Anda temui saat memulai atau
mencerminkan menu digital SmartClean X5

Chunk 4:
Halaman ini berisi statistik operasional
yang penting, tampilan status, dan tombol navigasi penting yang mengarah ke menu
fungsionalitas tambahan

Chunk 5:
Anda dapat melihat halaman sampul pada aplikasi seluler
SmartClean X5, layar sentuh robot, atau portal web



### Generate descriptions for images

In [44]:
client = OpenAI()
openai.api_key = os.getenv("OPENAI_API_KEY")

In [45]:
def encode_image(image_path):
    """Reads and encodes an image to a Base64 string."""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def get_mime_type(filename):
    """Returns the MIME type based on the file extension."""
    if filename.lower().endswith((".jpg", ".jpeg")):
        return "image/jpeg"
    elif filename.lower().endswith(".png"):
        return "image/png"
    else:
        return "application/octet-stream"

In [46]:
def generate_description(image_path):
    """
    Encodes the image, builds a data URI, and sends a prompt with the image_url
    to the OpenAI API to generate a description.
    """
    filename = os.path.basename(image_path)
    mime_type = get_mime_type(filename)
    base64_image = encode_image(image_path)
    
    # Build the data URI for the image
    data_uri = f"data:{mime_type};base64,{base64_image}"
    
    # Prepare the message payload
    response = client.chat.completions.create(
        model="gpt-4o-mini",  # Change to your desired model identifier
        messages=[
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What is in this image?"},
                    {"type": "image_url", "image_url": {"url": data_uri}},
                ],
            }
        ],
    )
    
    # Extract and return the description from the response.
    # Adjust the extraction based on the actual response structure.
    return response.choices[0]

In [47]:
def process_images_in_folder(folder_path):
    """
    Processes all images in the specified folder and collects their descriptions.
    Returns a list of dictionaries with image filename and generated description.
    """
    results = []
    for filename in os.listdir(folder_path):
        if filename.lower().endswith((".jpg", ".jpeg", ".png")):
            image_path = os.path.join(folder_path, filename)
            print(f"Processing: {image_path}")
            description = generate_description(image_path)
            results.append({
                "image": filename,
                "description": description
            })
    return results

In [63]:
folder_path = "../extracted_images"
image_descriptions = process_images_in_folder(folder_path)

# Print data structure
for item in image_descriptions:
    print(type(item), item)

def extract_text(choice_obj):
    """Extracts the content string from the Choice-like object dynamically."""
    if hasattr(choice_obj, "message") and hasattr(choice_obj.message, "content"):
        return choice_obj.message.content  # Extract text description
    return choice_obj  # Return as is if not a Choice-like object

# Convert each item's description to a serializable format
for item in image_descriptions:
    item["description"] = extract_text(item["description"])  # Fix the description field

# Now, JSON serialization should work
with open("../image_descriptions.json", "w") as json_file:
    json.dump(image_descriptions, json_file, indent=4)

print("Descriptions saved to image_descriptions.json")

Processing: ../extracted_images/page_27_img_0.png
Processing: ../extracted_images/page_13_img_0.png
Processing: ../extracted_images/page_1_img_0.png
<class 'dict'> {'image': 'page_27_img_0.png', 'description': Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The image is a flowchart titled "Pohon Keputusan Pemecahan Masalah," which translates to "Decision Tree for Problem Solving." It outlines a decision-making process regarding a robot that is not charging correctly. \n\nHere’s a brief overview of its structure:\n\n- It begins with "Mulai" (Start).\n- The first decision question is about whether the robot is charging ("Robot tidak mengisi daya?").\n  - If "Yes," it suggests checking the power doc ("Periksa daya dok").\n  - If "No," it checks if the power is good ("Daya baik?").\n    - If "Yes," the process concludes with "Selesai" (Finished).\n    - If "No," it prompts to check the battery health ("Periksa kesehatan baterai").\n      - If the

### Handle csv

In [8]:
def process_csv(csv_path):
    df = pd.read_csv(csv_path)
    text_chunks = []
    for _, row in df.iterrows():
        text = (f"Pada {row['Waktu_Mulai']}, SmartClean X5 beroperasi dalam mode {row['Mode_Pembersihan']}, "
                f"mencakup area seluas {row['Luas_Area_yang_Tercakup_m2']} m² di lantai {row['Tipe_Lantai']}. "
                f"Baterai digunakan: {row['Penggunaan_Baterai']}%. Menghadapi {row['Kendala_Pertemuan']} kendala. "
                f"Tempat sampah penuh: {row['Tempat Sampah_Penuh']}. Versi firmware: {row['Versi_Firmware']}.")
        text_chunks.append(text)

    return text_chunks

In [9]:
csv_path = "../synthetic_smartclean_data_indo.csv"
csv_chunks = process_csv(csv_path)

### Embedding 

In [76]:
from PIL import Image
import io

def get_jina_image_embedding(image_path, max_size=(256, 256), quality=70):
    """
    Fetches compressed image embeddings from Jina API.
    - Further reduces image size to 256x256
    - Lowers JPEG quality to 70 for more compression
    - Ensures Base64 encoding remains within token limits
    """
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {JINA_API_KEY}"
    }

    # Open and resize image
    with Image.open(image_path) as img:
        img = img.convert("RGB")  # Convert to RGB (to avoid transparency issues)
        img.thumbnail(max_size)  # Resize while maintaining aspect ratio

        # Convert to JPEG in memory with lower quality
        img_bytes = io.BytesIO()
        img.save(img_bytes, format="JPEG", quality=quality)
        encoded_image = base64.b64encode(img_bytes.getvalue()).decode("utf-8")

    # Ensure Base64 string is within limit
    if len(encoded_image) > 8194 * 4:  # Base64 expands data ~4x, so we check against 4x the limit
        print(f"Skipping {image_path}: Base64 encoding still too large ({len(encoded_image)} tokens).")
        return None

    payload = {
        "input": [encoded_image],  
        "model": "jina-embeddings-v3"
    }

    response = requests.post(JINA_API_URL, headers=headers, data=json.dumps(payload))

    if response.status_code == 200:
        return response.json()["data"][0]["embedding"]  # Return image embedding
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None


### Initialize database

In [5]:
import chromadb

# Initialize ChromaDB Client
db = chromadb.PersistentClient(path="../multi_modal_db")

# Create collections for text and images
text_collection = db.get_or_create_collection("text_embeddings")
image_collection = db.get_or_create_collection("image_descriptions")
csv_collection = db.get_or_create_collection("csv_data")

In [53]:
def store_text(text_chunks):
    """
    Stores text document chunks as searchable embeddings.
    """
    for i, text in enumerate(text_chunks):
        embedding = get_jina_embeddings([text])[0]
        text_collection.add(
            ids=[str(i)],
            embeddings=[embedding],
            metadatas=[{"text": text}]
        )


In [58]:
store_text(processed_semantic_chunks)

Insert of existing embedding ID: 0
Add of existing embedding ID: 0
Insert of existing embedding ID: 1
Add of existing embedding ID: 1
Insert of existing embedding ID: 2
Add of existing embedding ID: 2
Insert of existing embedding ID: 3
Add of existing embedding ID: 3
Insert of existing embedding ID: 4
Add of existing embedding ID: 4
Insert of existing embedding ID: 5
Add of existing embedding ID: 5
Insert of existing embedding ID: 6
Add of existing embedding ID: 6
Insert of existing embedding ID: 7
Add of existing embedding ID: 7
Insert of existing embedding ID: 8
Add of existing embedding ID: 8
Insert of existing embedding ID: 9
Add of existing embedding ID: 9
Insert of existing embedding ID: 10
Add of existing embedding ID: 10
Insert of existing embedding ID: 11
Add of existing embedding ID: 11
Insert of existing embedding ID: 12
Add of existing embedding ID: 12
Insert of existing embedding ID: 13
Add of existing embedding ID: 13
Insert of existing embedding ID: 14
Add of existing em

In [65]:
json_path = "../image_descriptions.json"
# Load Image Descriptions from JSON
def load_image_descriptions(json_path):
    with open(json_path, "r") as file:
        return json.load(file)

# Example Usage
image_descriptions = load_image_descriptions(json_path)

print("Loaded Image Descriptions:", image_descriptions[:3])  # Print sample


Loaded Image Descriptions: [{'image': 'page_27_img_0.png', 'description': 'The image is a flowchart titled "Pohon Keputusan Pemecahan Masalah," which translates to "Decision Tree for Problem Solving." It outlines a decision-making process regarding a robot that is not charging correctly. \n\nHere’s a brief overview of its structure:\n\n- It begins with "Mulai" (Start).\n- The first decision question is about whether the robot is charging ("Robot tidak mengisi daya?").\n  - If "Yes," it suggests checking the power doc ("Periksa daya dok").\n  - If "No," it checks if the power is good ("Daya baik?").\n    - If "Yes," the process concludes with "Selesai" (Finished).\n    - If "No," it prompts to check the battery health ("Periksa kesehatan baterai").\n      - If the battery needs changing ("Baterai diganti?"), it leads to that action.\n- There are also steps to contact support if needed.\n\nOverall, it provides a systematic approach to troubleshoot issues related to the robot\'s charging.

In [78]:
import os

def store_images_with_metadata(image_descriptions, image_folder="../extracted_images"):
    """
    Stores image embeddings along with descriptions as metadata in ChromaDB.
    """
    for entry in image_descriptions:
        image_name = entry["image"]  # Extract image filename
        image_path = os.path.join(image_folder, image_name)  # Construct full path
        description = entry["description"]

        # Ensure the image file exists before processing
        if not os.path.exists(image_path):
            print(f"Skipping {image_name}: Image file not found at {image_path}")
            continue

        # Generate embeddings
        image_embedding = get_jina_image_embedding(image_path)  
        text_embedding = get_jina_embeddings([description]) 

        # Handle API failures
        if image_embedding is None:
            print(f"Skipping {image_name}: Failed to generate image embedding.")
            continue
        if text_embedding is None or len(text_embedding) == 0:
            print(f"Skipping {image_name}: Failed to generate text embedding.")
            continue

        # Store image and text embeddings in ChromaDB
        image_collection.add(
            ids=[image_name],  
            embeddings=[image_embedding],  # Store image embedding
            metadatas=[
                {
                    "image_path": image_path, 
                    "description": description,
                }
            ]
        )

    print("All valid images stored in ChromaDB with descriptions!")


In [79]:
store_images_with_metadata(image_descriptions)

All valid images stored in ChromaDB with descriptions!


In [80]:
def store_csv_chunks(csv_chunks, csv_collection):
    """
    Stores processed CSV sentences as embeddings in ChromaDB.

    Args:
        csv_chunks (list): List of structured sentences from CSV.
        csv_collection: ChromaDB collection for storing CSV embeddings.
    """
    for index, text in enumerate(csv_chunks):
        # Get text embedding
        text_embedding = get_jina_embeddings([text])

        if text_embedding is None:
            print(f"Skipping entry {index}: Failed to generate embedding.")
            continue

        # Store in ChromaDB
        csv_collection.add(
            ids=[f"csv_{index}"],  # Unique ID for each CSV row
            embeddings=[text_embedding[0]],  # Store the embedding
            metadatas=[{"sentence": text}]  # Store original sentence as metadata
        )

In [81]:
store_csv_chunks(csv_chunks, csv_collection)

### Query

In [6]:
query_examples = {
    "text": [
        "What are the troubleshooting steps for a robot issue?",
        "How does SmartClean X5 handle errors?",
        "Explain the maintenance process of the robot."
    ],
    "image": [
        "Show me a chart about cleaning efficiency.",
        "Do you have a workflow diagram for troubleshooting?",
        "I need a graph on battery performance."
    ],
    "csv": [
        "What is the cleaning efficiency on carpet?",
        "How much battery is used for a full cleaning cycle?",
        "Show the firmware version details."
    ]
}

In [7]:
def store_query_examples():
    """
    Stores example queries and their embeddings in ChromaDB for query type detection.
    """
    query_collection = db.get_or_create_collection("query_types")

    for query_type, examples in query_examples.items():
        embeddings = get_jina_embeddings(examples)

        for idx, emb in enumerate(embeddings):
            query_collection.add(
                ids=[f"{query_type}_{idx}"],
                embeddings=[emb],
                metadatas=[{"query_type": query_type}]
            )

    print("Stored representative query embeddings in ChromaDB.")

store_query_examples()

Add of existing embedding ID: text_0
Insert of existing embedding ID: text_0
Add of existing embedding ID: text_1
Insert of existing embedding ID: text_1
Add of existing embedding ID: text_2
Insert of existing embedding ID: text_2
Add of existing embedding ID: image_0
Insert of existing embedding ID: image_0
Add of existing embedding ID: image_1
Insert of existing embedding ID: image_1
Add of existing embedding ID: image_2
Insert of existing embedding ID: image_2
Add of existing embedding ID: csv_0
Insert of existing embedding ID: csv_0
Add of existing embedding ID: csv_1
Insert of existing embedding ID: csv_1
Add of existing embedding ID: csv_2
Insert of existing embedding ID: csv_2


Stored representative query embeddings in ChromaDB.


In [8]:
def detect_query_type_dynamic(query):
    """
    Uses semantic similarity to determine if the query is related to text, image, or CSV data.
    """
    query_collection = db.get_collection("query_types")
    query_embedding = get_jina_embeddings([query])

    # Retrieve the closest stored query type
    results = query_collection.query(
        query_embeddings=query_embedding,
        n_results=1
    )

    if results and results["metadatas"]:
        return results["metadatas"][0][0]["query_type"]

    return "text"  # Default to text if no match is found

In [9]:
def retrieve_from_chromadb(query, top_k=5):
    """
    Uses semantic similarity to detect query type and retrieve relevant results.
    """
    query_type = detect_query_type_dynamic(query)

    if query_type == "text":
        results = text_collection.query(
            query_embeddings=get_jina_embeddings([query]),
            n_results=top_k
        )
        extracted_results = [meta["text"] for meta in results["metadatas"][0] if "text" in meta]
    elif query_type == "image":
        results = image_collection.query(
            query_embeddings=get_jina_embeddings([query]),
            n_results=top_k
        )
        extracted_results = [meta["description"] for meta in results["metadatas"][0] if "description" in meta]

    elif query_type == "csv":
        results = csv_collection.query(
            query_embeddings=get_jina_embeddings([query]),
            n_results=top_k
        )
        extracted_results = [meta["sentence"] for meta in results["metadatas"][0] if "sentence" in meta]

    else:
        return "Query type not recognized."

    return {
        "query_type": query_type,
        "results": extracted_results
    }

In [153]:
query = "What are the troubleshooting steps for a robot issue?"
result = retrieve_from_chromadb(query)

In [146]:
print(result)

{'query_type': 'text', 'results': ['Robot tidak Membersihkan Secara Efektif: Jika robot gagal membersihkan secara\nefektif, periksa apakah tempat sampah telah dikosongkan, sikat bebas dari kotoran,\ndan apakah filternya bersih. Pastikan sensor bebas dari debu.\n5.2. Unit Tetap Berada di Dekat Dok Pengisian Daya: Jika unit tetap berada di dekat\ndudukan pengisian daya, tingkatkan waktu pembersihan dalam pengaturan atau\nkurangi area pembersihan.\n5.3. Pematian Tak Terduga: Jika terjadi pematian tak terduga, sambungkan unit\nsecara langsung ke pengisi daya. Jika tidak menyala, hubungi layanan pelanggan.\n6', 'Mengatur Ulang Pengaturan Wi-Fi\nJika semuanya gagal, mengatur ulang pengaturan Wi-Fi pada perangkat Anda dapat\nmemberikan awal yang baru untuk\nKesalahan dan Perbaikan Robot yang Umum Terjadi\nBagian 6: Kesalahan Umum Robot dan Perbaikan untuk Vakum Robot AI\nSmartClean X5\nBagian ini memberikan panduan komprehensif tentang kesalahan umum yang\nmungkin terjadi saat menggunakan vak

In [141]:
query_2 = "Show me a chart about cleaning efficiency."
retrieved_texts_2 = retrieve_from_chromadb(query_2)  # assuming this function is defined elsewhere

Number of requested results 5 is greater than number of elements in index 3, updating n_results = 3


In [142]:
print(retrieved_texts_2)

{'query_type': 'image', 'results': ['The image is a flowchart titled "Pohon Keputusan Pemecahan Masalah," which translates to "Decision Tree for Problem Solving." It outlines a decision-making process regarding a robot that is not charging correctly. \n\nHere’s a brief overview of its structure:\n\n- It begins with "Mulai" (Start).\n- The first decision question is about whether the robot is charging ("Robot tidak mengisi daya?").\n  - If "Yes," it suggests checking the power doc ("Periksa daya dok").\n  - If "No," it checks if the power is good ("Daya baik?").\n    - If "Yes," the process concludes with "Selesai" (Finished).\n    - If "No," it prompts to check the battery health ("Periksa kesehatan baterai").\n      - If the battery needs changing ("Baterai diganti?"), it leads to that action.\n- There are also steps to contact support if needed.\n\nOverall, it provides a systematic approach to troubleshoot issues related to the robot\'s charging.', 'The image contains text that reads

In [143]:
query_3 = "How much battery is used for a full cleaning cycle?"
retrieved_texts_3 = retrieve_from_chromadb(query_3)  # assuming this function is defined elsewhere

In [144]:
print(retrieved_texts_3)

{'query_type': 'csv', 'results': ['Pada 1/24/25 8:17, SmartClean X5 beroperasi dalam mode Deep Clean, mencakup area seluas 5.88 m² di lantai Karpet . Baterai digunakan: 6%. Menghadapi 8 kendala. Tempat sampah penuh: BENAR. Versi firmware: 1.2.42.', 'Pada 2/1/25 11:19, SmartClean X5 beroperasi dalam mode Deep Clean, mencakup area seluas 15.56 m² di lantai Karpet . Baterai digunakan: 28%. Menghadapi 2 kendala. Tempat sampah penuh: BENAR. Versi firmware: 1.2.54.', 'Pada 1/29/25 1:20, SmartClean X5 beroperasi dalam mode Deep Clean, mencakup area seluas 47.47 m² di lantai Karpet . Baterai digunakan: 30%. Menghadapi 10 kendala. Tempat sampah penuh: BENAR. Versi firmware: 1.6.6.', 'Pada 1/8/25 9:26, SmartClean X5 beroperasi dalam mode Deep Clean, mencakup area seluas 35.53 m² di lantai Karpet . Baterai digunakan: 13%. Menghadapi 9 kendala. Tempat sampah penuh: BENAR. Versi firmware: 1.0.15.', 'Pada 1/13/25 5:54, SmartClean X5 beroperasi dalam mode Deep Clean, mencakup area seluas 32.9 m² di l

### Load Model using Groq

In [10]:
def generate_response_groq(prompt, model="llama-3.3-70b-versatile"):
    """
    Generates a response using the Groq API with the specified model.
    Adjusts parameters like temperature and max_tokens as needed.
    """
    # Initialize the client with your API key
    client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
    
    # Create a chat completion request
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": prompt}],
        model=model,
        temperature=0.1,  # optional: adjust as needed
        max_tokens=512    # optional: adjust as needed
    )
    
    # Return the generated content
    return chat_completion.choices[0].message.content

In [12]:
query = "Mode pembersihan apa saja yang tersedia di SmartClean X5?"
retrieved_texts = retrieve_from_chromadb(query)["results"]  # assuming this function is defined elsewhere
print(retrieved_texts)
# Combine retrieved texts into context
context = "\n\n".join(retrieved_texts)
prompt = f"Gunakan informasi berikut ini untuk menjawab pertanyaan pengguna:\n\n{context}\n\nPertanyaan: {query}"

# print(prompt)
llm_response = generate_response_groq(prompt)
print("LLM Response:", llm_response)

['Mode Pembersihan: SmartClean X5 AI memiliki empat mode pembersihan', 'Lima Mode Pembersihan\nOptimalkan efisiensi pembersihan SmartClean X5 dengan memanfaatkan lima mode\npembersihannya:\n(cid:127) Bersih Otomatis: Perangkat ini menavigasi sendiri, membersihkan seluruh\nrumah Anda secara menyeluruh', "Pengantar: SmartClean X5 AI Robot Vacuum memberikan pengalaman pembersihan\notomatis dan tanpa tangan yang dirancang untuk menyederhanakan pekerjaan rumah\ntangga Anda. Bagian ini akan memandu Anda tentang cara menavigasi berbagai\nmode pembersihan dan menyesuaikan pengaturan robot penyedot debu agar sesuai\ndengan kebutuhan spesifik Anda.\nMode Pembersihan:\n1. Mode Otomatis (default): Mode ini memungkinkan SmartClean X5 untuk\nmengatur jalurnya di sekitar rumah Anda, memetakan jalurnya, dan melakukan tugas\npembersihan secara otomatis. Untuk mengaktifkannya, tekan tombol 'Auto' pada\npanel kontrol atau Aplikasi SmartClean.\n2. Mode Spot Clean: Mode ini memungkinkan pembersihan yang di