In [147]:
!pip install langchain_community unstructured jq docarray faiss-gpu chromadb -q


In [150]:
# from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document

from langchain_community.vectorstores import Chroma
import openai
from dotenv import load_dotenv
import os
import argparse
from langchain.prompts import ChatPromptTemplate
import shutil

In [142]:
CHROMA_PATH = "chroma"
DATA_PATH = "/content/data"

In [208]:
from pathlib import Path

# Specify the directory
directory = Path('/content/data')

# Collect paths of all JSON files
json_files = list(directory.glob('*.json'))
json_files = [str(j) for j in json_files]
print(json_files)

['/content/data/example.json', '/content/data/bottle.json']


In [209]:
from langchain_community.document_loaders import JSONLoader

docs = []
for fp in json_files:
    loader = JSONLoader(
        file_path=fp,
        jq_schema='.',
        text_content=False)

    data = loader.load()
    docs.extend(data)

In [182]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=600,
    chunk_overlap=200,
    length_function=len,
    add_start_index=True,
)
chunks = text_splitter.split_documents(docs)
print(f"Split {len(docs)} documents into {len(chunks)} chunks.")

Split 2 documents into 12 chunks.


In [183]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

# Wrap the SentenceTransformer model using HuggingFaceEmbeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
from langchain_community.vectorstores import Chroma

vectordb = Chroma.from_documents(
    chunks, embedding_model, persist_directory=CHROMA_PATH
)
vectordb.persist()
print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")

In [185]:
from langchain import PromptTemplate, LLMChain
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

# Load the LLaMA model and tokenizer
model_name = "meta-llama/Llama-3.2-1B"  # Replace with the exact model name/path
token = "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"
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=token, truncation=True)
tokenizer.pad_token_id = tokenizer.eos_token_id  # Set pad_token_id explicitly
model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=token, device_map="auto", torch_dtype="auto")

# Create a pipeline for inference
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=400, temperature=0.3, top_p=0.9)

# Wrap the pipeline in HuggingFacePipeline
llm = HuggingFacePipeline(pipeline=pipe)



In [186]:

from langchain.chains.question_answering import load_qa_chain
prompt_template = """
Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
provided context just say, "answer is not available in the context", don't provide the wrong answer\n\n
Context:\n {context}?\n
Question: \n{question}\n
Answer:
"""
prompt = PromptTemplate(template=prompt_template,
                        input_variables=["context", "question"])
r = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

In [197]:
query = 'what are the materials of making a calming bottle ?'

In [198]:
docs = vectordb.similarity_search(query)

In [199]:
response = r({"input_documents": docs, "question": query}, return_only_outputs=True)

In [200]:
print(response['output_text'])


Answer the question as detailed as possible from the provided context, make sure to provide all the details, if the answer is not in
provided context just say, "answer is not available in the context", don't provide the wrong answer


Context:
 {"Title": "How to Make a Calming Bottle", "Supplies": "\n--------\n\n*   plastic 16-ounce bottle with lid\n*   8 ounces clear liquid glue\u00a0\n*   2 cups water\n*   2 cup glass measuring cup\n*   microwave\u00a0\n*   extra fine glitter (any color)\n*   food coloring (any color)\n*   funnel\n*   small spoon\u00a0\n*   small lightweight items to place in bottle (optional)\n*   glue gun (recommended)", "Step 1": "Prepare the Bottle\n--------------------------\n\n*   Clean the plastic bottle.\n*   Peel off any labels.", "Step 2": "Heat Water\n------------------\n\nHeat 1 cup of water in the

of hot water and \u00bd cup of clear liquid glue into the bottle.\n*   Add 5 drops of food coloring.\n*   Place the lid on the bottle tightly and shake to mi