# Simple RAG Example with Weaviate and LangChain

## WSL and Shell Command Helpers

In [1]:
import platform
import subprocess
import os

# --- WSL Detection ---
system = platform.system()
USE_WSL = system == "Windows"
print(f"Operating System: {system}. Using WSL for Docker commands: {USE_WSL}")

# --- Shell Command Helpers ---
def run_wsl_command(command):
    """Executes a command inside WSL and returns the result."""
    result = subprocess.run(
        ["wsl", "-e", "bash", "-l", "-c", command],
        capture_output=True,
        text=True,
        encoding="utf-8",
        errors="replace"
    )
    return {
        "returncode": result.returncode,
        "stdout": result.stdout.strip(),
        "stderr": result.stderr.strip(),
        "success": result.returncode == 0
    }

def run_linux_command(command):
    """Executes a command in a standard Linux/macOS shell."""
    result = subprocess.run(
        command,
        shell=True,
        capture_output=True,
        text=True,
        encoding="utf-8",
        errors="replace"
    )
    return {
        "returncode": result.returncode,
        "stdout": result.stdout.strip(),
        "stderr": result.stderr.strip(),
        "success": result.returncode == 0
    }

def run_shell_command(command):
    """Universal function to run a shell command, abstracting WSL usage."""
    if USE_WSL:
        return run_wsl_command(command)
    else:
        return run_linux_command(command)

print("‚úÖ Shell command helpers are defined.")

Operating System: Windows. Using WSL for Docker commands: True
‚úÖ Shell command helpers are defined.


## Install Dependencies

In [3]:
import sys

!"{sys.executable}" -m pip install -q weaviate-client==4.18.1 langchain~=0.3.0 langchain-openai~=0.2.0 python-dotenv~=1.0.0 pandas~=2.2.0

print("‚úÖ Required libraries have been installed.")

‚úÖ Required libraries have been installed.


In [4]:
!"{sys.executable}" -m pip install -U -q sentence-transformers accelerate

print("‚úÖ Extra libraries for local models run have been installed.")

‚úÖ Extra libraries for local models run have been installed.


## Configuration

In [2]:
import os
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
HF_API_TOKEN = os.environ["HUGGINGFACE_API_TOKEN"]

# Embeddingd model for local run.
# If you have access to Gemma (you logged in via huggingface-cli), use: "google/embeddinggemma-300m" (768 dimensions)
# If you don't have access or encounter errors, use the standard one: "all-MiniLM-L6-v2" (384 dimensions)
LOCAL_EMBEDDING_MODEL_NAME = "google/embeddinggemma-300m"
# LOCAL_EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2" # Uncomment if Gemma doesn't work


# Text generation model for local run.
LOCAL_LLM_MODEL_NAME = "google/gemma-3-1b-it"


# --- VECTOR DATABASE CONFIGURATION ---
WEAVIATE_CONTAINER_NAME = "simple-rag-weaviate"
WEAVIATE_IMAGE = "semitechnologies/weaviate:1.33.7"
WEAVIATE_HTTP_PORT = 8080
WEAVIATE_GRPC_PORT = 50051

print("‚úÖ Configuration loaded.")

‚úÖ Configuration loaded.


In [3]:
from huggingface_hub import login

login(token=HF_API_TOKEN)
print("Successfully logged in to Hugging Face!")

  from .autonotebook import tqdm as notebook_tqdm


Successfully logged in to Hugging Face!


## Data Generation

In [4]:
documents_data = [
    {
        "title": "Toddler Health and Feeding Guide (1‚Äì3 Years) WHO",
        "content": "The period from one to three years of age is marked by rapid physical growth, neurological development, and increasing independence. During this stage, a child‚Äôs immune system is still maturing, making toddlers especially sensitive to infections, changes in nutrition, and environmental factors. Understanding common health conditions and proper nutrition is essential for supporting healthy development and preventing complications. One of the most frequent health concerns in toddlers is fever. Normal body temperature ranges between 36.5 and 37.5 degrees Celsius, while a temperature above 38 degrees is considered a fever. Fever itself is not a disease, but rather a natural response of the immune system to infection. In toddlers, fever is most commonly caused by viral infections, teething, and post-vaccination immune reactions. During episodes of fever, the child‚Äôs body works harder to fight infection, increasing the need for rest and hydration. Parents should ensure that the child drinks adequate fluids, wears light clothing, and remains in a calm, cool environment. The child‚Äôs temperature should be checked regularly. Medical assistance is required if the fever exceeds 39 degrees, lasts longer than forty-eight hours, or is accompanied by lethargy, seizures, breathing difficulty, or refusal to drink. Another common condition in toddlers is diarrhea. Diarrhea is usually caused by viral or bacterial infections, unwashed hands, contaminated food, or food intolerance. The primary danger associated with diarrhea is dehydration, which can develop quickly in young children. Parents should continue feeding the child in small amounts and provide oral rehydration solutions to replace lost fluids and electrolytes. Sugary drinks and carbonated beverages should be avoided, as they can worsen dehydration. Warning signs of serious dehydration include dry mouth, lack of tears when crying, sunken eyes, prolonged absence of urination, and extreme weakness. Timely medical intervention is essential if these symptoms appear. Teething continues through the toddler years as the back molars emerge. Teething may cause gum swelling, drooling, mild fever, and irritability. Children may chew on objects to relieve discomfort. Safe relief methods include teething rings, gentle gum massage with clean fingers, and cold compresses. Pain-relieving medications should only be used when necessary and under medical supervision. It is important to remember that serious symptoms such as high fever, severe diarrhea, or persistent vomiting are not normal signs of teething and require medical evaluation. Nutrition during the toddler years plays a decisive role in brain development, immune strength, bone formation, and emotional stability. Toddlers require three balanced meals and two healthy snacks per day. Their diet should contain a variety of food groups including whole grains, vegetables, fruits, protein, and dairy. Vegetables such as carrots, broccoli, zucchini, and pumpkin supply essential micronutrients. Fruits such as apples, pears, bananas, and berries provide fiber and natural energy. Protein sources include chicken, turkey, lentils, beans, eggs, and fish. Dairy products such as milk, yogurt, and cheese provide calcium and vitamin D, which are crucial for bone growth. Introduction of potentially allergenic foods such as eggs, nuts, fish, and cow‚Äôs milk should be done gradually and under careful observation. Food allergies may present as skin rash, swelling of the lips or face, vomiting, diarrhea, or difficulty breathing. In severe cases, immediate emergency medical care is required. Many children outgrow food sensitivities over time, but early identification is essential for safety. Picky eating behavior is very common between one and three years and is closely connected to the toddler‚Äôs growing sense of independence. Children may refuse previously accepted foods, eat very small portions, or demand the same food repeatedly. This behavior is a normal developmental stage and should not be forced or punished. Parents are advised to offer a varied diet, maintain consistent meal times, avoid pressure, and model healthy eating behavior through family meals. When meals are calm and predictable, children gradually learn to trust food and regulate their own appetite. Feeding is not only a biological process but also a social and emotional experience. Shared meals support speech development, emotional bonding, and the formation of healthy lifelong habits. Proper nutrition, combined with sufficient sleep and physical activity, forms the cornerstone of toddler health."
    },
    {
        "title": "Montessori-Based Development and Play for Toddlers (1‚Äì3 Years)",
        "content": "The Montessori method of education, developed by Maria Montessori, is based on respect for the child‚Äôs natural psychological, physical, and social development. For toddlers aged one to three years, Montessori principles focus on independence, sensory exploration, movement, and purposeful activity. At this stage, children experience rapid brain growth, forming millions of neural connections through movement, interaction, and exploration of their environment. A Montessori-based environment is carefully prepared to support safe independence. Furniture is scaled to the child‚Äôs size, materials are accessible without adult assistance, and everyday objects replace overly stimulating toys. Practical life activities represent a central element of Montessori education for toddlers. These include pouring water, washing hands, carrying small objects, cleaning surfaces, and feeding themselves. Such tasks strengthen fine motor skills, improve coordination, and develop concentration. Most importantly, they help the child develop confidence in their own abilities. Independence in everyday actions reduces frustration and tantrums because the child feels capable and respected. Sensory development is another core area addressed by Montessori materials. Toddlers explore textures, shapes, sounds, weights, and sizes through hands-on interaction. Simple activities such as sorting objects by color, stacking blocks, touching different fabrics, or listening to natural sounds stimulate the sensory systems and support cognitive organization. Language development in Montessori education is supported through rich verbal interaction, naming objects, reading, singing, and storytelling. However, forced teaching of letters or numbers at this stage is discouraged. Instead, language grows naturally through meaningful communication. Adults are encouraged to speak slowly, clearly, and with respect, allowing the child to absorb vocabulary without pressure. Play in the Montessori approach is not separated from learning. Every meaningful activity becomes a learning opportunity. For example, when a toddler helps put away toys, they learn order and classification. When they pour water into a glass, they learn physics, coordination, and patience. These activities promote deep concentration known as normalization, a state in which the child is calm, focused, and internally motivated. Outdoor play also holds a special position in Montessori philosophy. Nature offers endless opportunities for sensory exploration and physical development. Walking on uneven surfaces, observing insects, touching plants, and playing with sand contribute to balance, observation skills, and emotional regulation. Free movement strengthens large muscle groups and supports healthy posture and coordination. Games with toddlers should always match their developmental level. At one year old, cause-and-effect games such as rolling a ball, opening boxes, and pressing buttons are especially engaging. At two years, imitation games, simple puzzles, and building towers become more suitable. By three years, toddlers begin to enjoy pretend play, sorting games, simple board games, and cooperative activities. The adult‚Äôs role in Montessori play is not to direct constantly but to observe, support when needed, and intervene minimally. Respect for the child‚Äôs rhythm is fundamental. Some children repeat the same activity many times, which is a sign of deep learning rather than boredom. Emotional development is also supported through Montessori principles. Toddlers are learning to recognize and express emotions but lack full control over impulses. Rather than punishment, Montessori emphasizes calm guidance, clear boundaries, and emotional labeling. When a child is upset, the adult helps name the feeling and model appropriate responses. Over time, the child internalizes these emotional regulation strategies. The Montessori environment and method can be naturally integrated into a smart toddler care assistant system. Digital reminders for practical life routines, guidance for age-appropriate play activities, and emotional support prompts can help parents align daily caregiving with Montessori principles while preserving warmth and human interaction."
    },
    {
        "title": "Smart Toddler Care Assistant and Parent-Guided Play (1‚Äì3 Years)",
        "content": "The early childhood period from one to three years is increasingly supported by digital health and development technologies. A Smart Toddler Care Assistant is a conceptual digital system designed to guide parents through the complex processes of child development, health monitoring, emotional support, and daily routines. Such a system is not meant to replace parental care but to enhance awareness, structure, and informed decision-making. The assistant can provide reminders for feeding, hydration, sleep schedules, vaccinations, and developmental milestones. Parents often experience stress, fatigue, and information overload, especially in the first years of childcare. A smart assistant can transform fragmented knowledge into structured, practical support. One of the most important functions of a smart toddler assistant is health monitoring. Parents can record body temperature, appetite, sleep duration, bowel movements, and mood. Pattern analysis allows early detection of potential health issues such as dehydration, sleep disorders, or feeding difficulties. For example, reduced appetite combined with increased sleep and irritability may indicate the onset of illness. When such patterns emerge, the system can recommend observation, home care strategies, or medical consultation. Nutrition guidance is another critical component. The assistant can suggest balanced meal plans, age-appropriate portion sizes, and alert parents to potential nutrient gaps such as iron or vitamin D deficiency. Integration with Montessori principles ensures that feeding recommendations emphasize self-feeding skills, sensory exploration of food, and positive mealtime atmosphere. The system may suggest allowing the toddler to choose between two healthy options, promoting autonomy while maintaining nutritional quality. Play and developmental guidance represent one of the most valuable aspects of the smart assistant. Based on the child‚Äôs age, the system can recommend cognitive, sensory, motor, and emotional games. For one-year-olds, it may suggest object permanence games, mirror play, and walking activities. For two-year-olds, it may propose stacking games, color sorting, imitation play, and simple songs with movements. For three-year-olds, pretend play, storytelling, simple logic games, and cooperative tasks become more relevant. Each recommendation can include a short explanation of what skill is being developed, such as fine motor control, social interaction, or emotional awareness. Emotional development receives increasing attention in modern digital child-support systems. Toddlers experience frustration, separation anxiety, fear, and intense emotional reactions. The smart assistant can guide parents in responding with emotional validation rather than punishment. For example, when a child throws a tantrum, the system may remind the parent to acknowledge the feeling, ensure safety, and provide calm presence instead of immediate discipline. Over time, consistent emotional support strengthens secure attachment and reduces maladaptive behavior patterns. Sleep regulation is another area where the assistant can be useful. Toddlers require approximately eleven to fourteen hours of total sleep per day, including naps. Irregular sleep schedules and excessive screen exposure can disturb circadian rhythms and lead to behavioral issues. The system can help structure consistent bedtime routines, suggest calming pre-sleep activities such as reading and bathing, and track sleep quality trends. Interactive play between parent and child remains irreplaceable despite technological support. The smart assistant should not draw attention away from real interaction but rather encourage meaningful shared activities. Joint play strengthens attachment, enhances speech development, and supports social learning. Reading aloud, singing, building together, and outdoor exploration remain foundational experiences that no digital system can substitute. Instead, the assistant acts as an invisible organizer and scientific advisor in the background of everyday parenting. When integrated with Montessori principles, pediatric health standards, and play-based learning, a Smart Toddler Care Assistant becomes a powerful tool for supporting holistic child development while preserving warmth, empathy, and human connection."
    }
]

print('Generated 3 documents')

Generated 3 documents


## Docker Environment Setup

In [6]:
# First, ensure no old container with the same name is running
print(f"--- Stopping and removing any existing container named '{WEAVIATE_CONTAINER_NAME}' ---")
stop_command = f"docker stop {WEAVIATE_CONTAINER_NAME} 2>/dev/null; docker rm {WEAVIATE_CONTAINER_NAME} 2>/dev/null"
run_shell_command(stop_command)
print("Cleanup complete.")

# Now, run the new Weaviate container
print(f"\n--- Starting Weaviate container '{WEAVIATE_CONTAINER_NAME}' ---")
run_command = (
    f"docker run -d "
    f"--name {WEAVIATE_CONTAINER_NAME} "
    f"-p {WEAVIATE_HTTP_PORT}:{WEAVIATE_HTTP_PORT} "
    f"-p {WEAVIATE_GRPC_PORT}:{WEAVIATE_GRPC_PORT} "
    f"-e AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED=true "
    f"-e PERSISTENCE_DATA_PATH=/var/lib/weaviate "
    f"-e DEFAULT_VECTORIZER_MODULE=none "
    f"-e ENABLE_MODULES='' "
    f"-e CLUSTER_HOSTNAME=node1 "
    f"{WEAVIATE_IMAGE}"
)

result = run_shell_command(run_command)

if result["success"]:
    print("‚úÖ Weaviate container started successfully.")
    print("Waiting a few seconds for the service to initialize...")
    import time
    time.sleep(10) # Give Weaviate time to start up
else:
    print("‚ùå Failed to start Weaviate container.")
    print(f"Stderr: {result['stderr']}")

# Display container statistics
print("\n--- Weaviate Container Stats ---")
stats_result = run_shell_command(f"docker stats {WEAVIATE_CONTAINER_NAME} --no-stream")
print(stats_result["stdout"])
if stats_result["stderr"]:
    print(f"Stderr: {stats_result['stderr']}")

--- Stopping and removing any existing container named 'simple-rag-weaviate' ---
Cleanup complete.

--- Starting Weaviate container 'simple-rag-weaviate' ---
‚úÖ Weaviate container started successfully.
Waiting a few seconds for the service to initialize...

--- Weaviate Container Stats ---
CONTAINER ID   NAME                  CPU %     MEM USAGE / LIMIT     MEM %     NET I/O          BLOCK I/O        PIDS
f6e0de8e09c7   simple-rag-weaviate   1.22%     51.18MiB / 15.18GiB   0.33%     8.05kB / 3.6kB   66.9MB / 201kB   23


## Embeddings and Data Ingestion

In [None]:
from langchain_core.messages import AIMessage
from langchain_core.runnables import Runnable, RunnableConfig
import weaviate
import weaviate.classes as wvc
from weaviate.util import generate_uuid5
from sentence_transformers import SentenceTransformer
from transformers import pipeline
import torch
import numpy as np

# --- Wrapper class for the local Embeddings model ---
class LocalHuggingFaceEmbeddings:
    """
    This class adapts a local SentenceTransformer model
    to the LangChain interface, which expects the methods embed_documents and embed_query.
    """
    def __init__(self, model_name):
        print(f"üì• Loading local embedding model: {model_name}...")
        try:
            self.model = SentenceTransformer(model_name)
            print("‚úÖ Local embedding model loaded successfully.")
        except Exception as e:
            print(f"‚ùå Error loading {model_name}. Falling back to 'all-MiniLM-L6-v2'.")
            print(f"Error details: {e}")
            self.model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

    def embed_documents(self, texts):
        # Returns a list of lists
        embeddings = self.model.encode(texts, convert_to_numpy=True)
        return embeddings.tolist()

    def embed_query(self, text):
        # Returns a single list
        embedding = self.model.encode(text, convert_to_numpy=True)
        return embedding.tolist()

In [None]:
# --- 1. Setup LangChain Clients ---
print("--- 1. Setting up AI clients ---")
try:
    # Embedding Model Setup
    embeddings_model = LocalHuggingFaceEmbeddings(LOCAL_EMBEDDING_MODEL_NAME)

    # # Chat Model Setup
    # chat_model = LocalHuggingFaceChatModel(LOCAL_LLM_MODEL_NAME)
    # print("‚úÖ AI clients initialized.")

except Exception as e:
    print(f"‚ùå Failed to initialize AI clients. Please check your .env file or model names. Error: {e}")
    # Stop execution if clients fail to initialize
    raise

--- 1. Setting up AI clients ---
üì• Loading local embedding model: google/embeddinggemma-300m...
‚úÖ Local embedding model loaded successfully.
üì• Loading local LLM: google/gemma-3-1b-it...


`torch_dtype` is deprecated! Use `dtype` instead!
Device set to use cpu


‚úÖ Local LLM loaded successfully.
‚úÖ AI clients initialized.


In [9]:
# --- Text Chunking Utility ---
def chunk_text(text, chunk_size=500, overlap=100):
    """
    Splits text into overlapping chunks.
    chunk_size = number of characters per chunk
    overlap = number of overlapping characters
    """
    chunks = []
    start = 0

    while start < len(text):
        end = start + chunk_size
        chunk = text[start:end]
        chunks.append(chunk)
        start += chunk_size - overlap

    return chunks

# --- 2. Generate Chunked Embeddings ---
print("\n--- 2. Generating embeddings for all document chunks ---")

chunked_documents = []

for doc in documents_data:
    chunks = chunk_text(doc["content"], chunk_size=500, overlap=100)

    for i, chunk in enumerate(chunks):
        chunked_documents.append({
            "title": doc["title"],
            "content": chunk,
            "chunk_id": i
        })

print(f"‚úÖ Created {len(chunked_documents)} text chunks.")

# Embed all chunks
contents_to_embed = [doc["content"] for doc in chunked_documents]
vector_embeddings = embeddings_model.embed_documents(contents_to_embed)

# Attach vectors to chunks
for i, doc in enumerate(chunked_documents):
    doc["content_vector"] = vector_embeddings[i]

print(f"‚úÖ Generated {len(vector_embeddings)} chunk embeddings.")



--- 2. Generating embeddings for all document chunks ---
‚úÖ Created 34 text chunks.
‚úÖ Generated 34 chunk embeddings.


In [10]:
# --- 3. Connect to Weaviate ---
print("\n--- 3. Connecting to Weaviate ---")
weaviate_client = weaviate.connect_to_local(
    host="localhost",
    port=WEAVIATE_HTTP_PORT,
    grpc_port=WEAVIATE_GRPC_PORT
)
if weaviate_client.is_ready():
    print("‚úÖ Successfully connected to Weaviate.")
else:
    print("‚ùå Failed to connect to Weaviate.")
    weaviate_client.close()
    raise ConnectionError("Could not connect to Weaviate instance.")


--- 3. Connecting to Weaviate ---
‚úÖ Successfully connected to Weaviate.


In [11]:
# --- 4. Define and Create Weaviate Collection ---
COLLECTION_NAME = "SimpleRAG"
print(f"\n--- 4. Creating Weaviate collection: '{COLLECTION_NAME}' ---")

# Delete collection if it already exists for a clean run
if weaviate_client.collections.exists(COLLECTION_NAME):
    weaviate_client.collections.delete(COLLECTION_NAME)
    print(f"Deleted existing collection '{COLLECTION_NAME}'.")

# Create new DB schema for our documents
rag_collection = weaviate_client.collections.create(
    name=COLLECTION_NAME,
    properties=[
        wvc.config.Property(name="title", data_type=wvc.config.DataType.TEXT),
        wvc.config.Property(name="content", data_type=wvc.config.DataType.TEXT),
        wvc.config.Property(name="chunk_id", data_type=wvc.config.DataType.INT),
    ],
    vector_config=wvc.config.Configure.Vectors.self_provided(
        vector_index_config=wvc.config.Configure.VectorIndex.hnsw(
            distance_metric=wvc.config.VectorDistances.COSINE
        )
    )
)

print(f"‚úÖ Collection '{COLLECTION_NAME}' created successfully.")


--- 4. Creating Weaviate collection: 'SimpleRAG' ---
‚úÖ Collection 'SimpleRAG' created successfully.


In [12]:
# --- 5. Batch-Insert Chunked Data ---
print(f"\n--- 5. Ingesting {len(chunked_documents)} document chunks into Weaviate ---")

with rag_collection.batch.dynamic() as batch:
    for doc in chunked_documents:
        properties = {
            "title": doc["title"],
            "content": doc["content"],
            "chunk_id": doc["chunk_id"]
        }

        unique_id = f"{doc['title']}_chunk_{doc['chunk_id']}"

        batch.add_object(
            properties=properties,
            vector=doc["content_vector"],
            uuid=generate_uuid5(unique_id)
        )

print(f"‚úÖ Chunked data ingestion complete. Total objects in collection: {len(rag_collection)}")
   

# Close the client connection
weaviate_client.close()


--- 5. Ingesting 34 document chunks into Weaviate ---
‚úÖ Chunked data ingestion complete. Total objects in collection: 34
