# Model parameters

In [1]:
import os

EMBEDDINGS_FILE = "MOOSE_embeddings"
DOCUMENT_DATASET = "MOOSE-website"
MODEL_NAME = "all-MiniLM-L12-v2"
CEN_MODEL = "ms-marco-MiniLM-L-12-v2"

current_folder = os.getcwd()
parent_dir = os.path.dirname(current_folder)

model_path = os.path.join("/Users/lim2/Research/LLM/pretrained_models", MODEL_NAME)

folder_files = os.path.join(current_folder+"/data/raw_data",DOCUMENT_DATASET)
cen_model_path = os.path.join("/Users/lim2/Research/LLM/pretrained_models/cross-encoders/", CEN_MODEL)



In [None]:

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))


In [8]:
def get_post_content(post):
    content = ""
    # Title
    content += post["title"] + "\n"
    # Body text
    if "bodyText" in post:
        content += post["bodyText"] + "\n"
    # Comments
    if "comments" in post:
        for comment in post["comments"]["edges"]:
            content += get_comment_content(comment["node"])
    return content


def get_comment_content(comment):
    content = ""
    # Body text
    if "bodyText" in comment:
        content += comment["bodyText"] + "\n"
    # Replies
    if "replies" in comment:
        for reply in comment["replies"]["edges"]:
            content += get_comment_content(reply["node"])
    return content


def get_reply_content(reply):
    return reply["bodyText"] + "\n"


def encode_content(model, content):
    return model.encode(content)

# Load local documatations

In [9]:
from llama_index.core import SimpleDirectoryReader
from llama_index.core import Document
from llama_index.core.readers.base import BaseReader

from pathlib import Path
import json

# Directory for storing JSON responses fetched from GitHub
response_dir = Path("response")
# Database directory
db_dir = Path("database")


class MyFileReader(BaseReader):
    def load_data(self, file, extra_info=None):
        with open(file, "r") as f:
            page = json.loads(file.read_text())
            posts = page["discussions"]["edges"]
            for post in posts:
                title = post["node"]["title"]
                url = post["node"]["url"]
                content = get_post_content(post["node"])
        # load_data returns a list of Document objects
        return [Document(text=content, metadata={"title": title, "url": url})]


reader = SimpleDirectoryReader(
    input_dir=response_dir, file_extractor={".json": MyFileReader()}
)

documents = reader.load_data()

print(f"Loaded {len(documents)} docs")

Loaded 972 docs


# Setup llama Index node

In [4]:
from llama_index.core.node_parser import (
    SentenceSplitter,
    SemanticSplitterNodeParser,
)
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
    Settings,
)

import torch

from llama_index.llms.ollama import Ollama
from llama_index.llms.openai import OpenAI
from llama_index.core.llms import ChatMessage
from llama_index.llms.openai_like import OpenAILike

embed_model = HuggingFaceEmbedding(
    model_name=model_path
)

splitter = SemanticSplitterNodeParser(embed_model=embed_model,
    buffer_size=50, breakpoint_percentile_threshold=95
)


# ollama
Settings.llm = Ollama(model="llama3.3", request_timeout=3600.0)

Settings.embed_model = embed_model


# Initialize the OpenAI client
# OpenAI.api_key = os.getenv("OPENAI_API_KEY")


# llm = OpenAILike(model="Mistral-Nemo-Instruct-2407", api_base="https://api.hpc.inl.gov/llm/v1",  is_chat_model=True)
# Settings.llm = llm

  from .autonotebook import tqdm as notebook_tqdm


At this point you can decide whether you would like to store index locally or store it in online vector database Qdrant. Run the following block to store the index on disk. Otherwise jump to `Set up Qdrant vector database` to use Qdrant vector database

# Create llama Index node

In [76]:
nodes = splitter.get_nodes_from_documents(documents, show_progress=True)

Generating embeddings: 100%|██████████| 17/17 [00:00<00:00, 163.09it/s]
Generating embeddings: 100%|██████████| 17/17 [00:00<00:00, 348.95it/s]
Generating embeddings: 100%|██████████| 8/8 [00:00<00:00, 375.67it/s]
Generating embeddings: 100%|██████████| 11/11 [00:00<00:00, 237.93it/s]
Generating embeddings: 100%|██████████| 9/9 [00:00<00:00, 380.71it/s]
Generating embeddings: 100%|██████████| 39/39 [00:00<00:00, 91.62it/s]
Generating embeddings: 100%|██████████| 20/20 [00:00<00:00, 310.74it/s]
Generating embeddings: 100%|██████████| 72/72 [00:00<00:00, 237.66it/s]
Generating embeddings: 100%|██████████| 3/3 [00:00<00:00, 145.59it/s]
Generating embeddings: 100%|██████████| 8/8 [00:00<00:00, 385.19it/s]
Generating embeddings: 100%|██████████| 20/20 [00:00<00:00, 326.86it/s]
Generating embeddings: 100%|██████████| 7/7 [00:00<00:00, 349.63it/s]
Generating embeddings: 100%|██████████| 9/9 [00:00<00:00, 345.34it/s]
Generating embeddings: 100%|██████████| 22/22 [00:00<00:00, 293.67it/s]
Gener

# Create storage context locally

In [10]:
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.vector_stores.hnswlib import HnswlibVectorStore


hnswlib_vector_store = HnswlibVectorStore.from_params(
    space="ip",
    dimension=embed_model._model.get_sentence_embedding_dimension(),
    max_elements=10000,
)

storage_context = StorageContext.from_defaults(
    docstore=SimpleDocumentStore(),
    vector_store=hnswlib_vector_store,
    index_store=SimpleIndexStore(),
)

storage_context.docstore.add_documents(nodes)
index = VectorStoreIndex(nodes, storage_context=storage_context,
    show_progress=True,)


index.storage_context.persist(persist_dir=db_dir)


NameError: name 'nodes' is not defined

# Load llama index from disk

In [11]:

vector_store = hnswlib_vector_store.from_persist_dir(db_dir)
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir=db_dir
)
index = load_index_from_storage(storage_context=storage_context)

# Test vector database

In [13]:

from llama_index.core.postprocessor import SentenceTransformerRerank, SentenceEmbeddingOptimizer
from llama_index.core import QueryBundle
from sentence_transformers import SentenceTransformer, CrossEncoder
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.postprocessor.llm_rerank import LLMRerank
from llama_index.core.postprocessor import SimilarityPostprocessor

k = 5
query = "Why is my simulation not converging"

query_bundle = QueryBundle(query)

hnswlib_vector_retriever = index.as_retriever(similarity_top_k=k, similarity_metric='cosine')
retrieved_nodes = hnswlib_vector_retriever.retrieve(query_bundle)
# retriever = VectorIndexRetriever(index=index,similarity_metric='cosine', similarity_top_k=k)
# retrieved_nodes = retriever.retrieve(query_bundle)

# processor = SimilarityPostprocessor(similarity_cutoff=0.5)
# #filtered_nodes = processor.postprocess_nodes(retrieved_nodes)

# cross_encoder = CrossEncoder(cen_model_path, max_length=2048, device="cpu")
# rerank = SentenceTransformerRerank(model=cen_model_path, top_n=5)
# #rerank = LLMRerank(choice_batch_size=5,top_n=3,)
# retrieved_nodes = rerank.postprocess_nodes(retrieved_nodes, query_bundle)

for node in retrieved_nodes:
    print(f"Node {node.id_} | Score: {node.score:.3f} - {node.metadata}...")

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Node 3459b242-1ec0-4596-9995-44ab3e939b35 | Score: 0.318 - {'title': "Why can't my simulation converge?", 'url': 'https://github.com/idaholab/moose/discussions/21826'}...
Node 1f76cb75-2434-4074-a0bd-cf2fb65be5dc | Score: 0.452 - {'title': 'Simulation stuck on really small time steps', 'url': 'https://github.com/idaholab/moose/discussions/26439'}...
Node 61d095b2-e4ca-4d95-b389-f42943165653 | Score: 0.483 - {'title': 'Converging but very slowly', 'url': 'https://github.com/idaholab/moose/discussions/17333'}...
Node fdadb758-53d6-4c35-9011-a21f9a378877 | Score: 0.491 - {'title': '"Solve did not converge"', 'url': 'https://github.com/idaholab/moose/discussions/19427'}...
Node cbd347b0-f497-4985-82ee-95d72c626144 | Score: 0.492 - {'title': 'Simulation stuck on really small time steps', 'url': 'https://github.com/idaholab/moose/discussions/26439'}...


In [None]:

from llama_index.core import SimpleDirectoryReader, get_response_synthesizer
from llama_index.core import DocumentSummaryIndex
from llama_index.llms.openai import OpenAI
from llama_index.core.node_parser import SentenceSplitter

response_synthesizer = get_response_synthesizer(
    response_mode="tree_summarize")
doc_summary_index = DocumentSummaryIndex.from_documents(
    documents,
    transformations=[splitter],
    response_synthesizer=response_synthesizer,
    show_progress=True,
)

In [None]:

from llama_index.core.indices.document_summary import (
    DocumentSummaryIndexEmbeddingRetriever,
)


retriever = DocumentSummaryIndexEmbeddingRetriever(
    index,
    # similarity_top_k=1,
)

retrieved_nodes = retriever.retrieve(query_bundle)

for node in retrieved_nodes:
    print(f"Node {node.id_} | Score: {node.score:.3f} - {node.text}...")

# Set up Qdrant vector database

In [None]:
import os
from qdrant_client import QdrantClient
from llama_index.vector_stores.qdrant import QdrantVectorStore

client = QdrantClient(
    url="https://api.hpc.inl.gov",
    prefix="qdrant",
    port=443,
    api_key = "eyJhbGciOiJIUzI1NiJ9.eyJhY2Nlc3MiOlt7ImNvbGxlY3Rpb24iOiJtZW5nbmFuX2xpIiwiYWNjZXNzIjoicncifV19.p5U7lfr17aR9_Yj4Pu759jfW2GkR42s2JlvkDNmfNoo"
)

print(client.get_collections())

In [None]:
import qdrant_client
client = qdrant_client.QdrantClient(
    location=":memory:",
)

In [None]:

vector_store = QdrantVectorStore(client=client, collection_name="mengnan_li", prefer_grpc=True)
storage_context = StorageContext.from_defaults(vector_store=vector_store)


index = VectorStoreIndex.from_documents(
     documents,
     storage_context=storage_context, show_progress=True)


## If you want to load llamaIndex nodes from disk to Qdrant database

In [None]:

storage_context.docstore.add_documents(nodes)
index = VectorStoreIndex(nodes, storage_context=storage_context,show_progress=True,)

# Test Qdrant vector database

In [None]:
k = 5
query = "ConcentricCircleMesh"

query_bundle = QueryBundle(query)

hnswlib_vector_retriever = index.as_retriever(similarity_top_k=k)
#nodes_with_scores = hnswlib_vector_retriever.retrieve(query_bundle)
retriever = VectorIndexRetriever(index=index,similarity_top_k=k)
retrieved_nodes = retriever.retrieve(query_bundle)


for node in retrieved_nodes:
    print(f"Node {node.id_} | Score: {node.score:.3f} - {node.text[:120]}...")

Node 4c233418-0371-4da5-be43-7282072ddab9 | Score: -3.459 - How to generate a cylinder and gridding?
I want to generate a simple cylinder using mesh generators, but haven't find th...
Node ea606dd0-fd61-45fd-9e48-f7806f95222d | Score: -5.289 - Uniform Mesh for Circular Geometry in MOOSE
Hi everyone,
I'm trying to simulate a circular geometry using the meshing to...
Node 3c7370b4-042c-42c2-8939-7ab4e0b62ea3 | Score: -7.217 - Preserve circular geometries when refining mesh
Hi all,
I am making a cylindrical mesh using meshing tools built into mo...
Node 7d7eb33f-060e-4de6-abf6-7901cd342828 | Score: -10.447 - I have one more question: If I want to create a cylinder, can I generate a circle using ParsedCurveGenerator, extrude it...
Node e6757dd3-e3ad-4d31-a94c-e10c288769b4 | Score: -11.293 - On second thought, I'd probably start with something very simple: the user specifies

position: the start position
perpi...


In [99]:
for node in retrieved_nodes:
    print(f"Node {node.id_} | Score: {node.score:.3f} - {node.metadata['title']}...")

Node 3459b242-1ec0-4596-9995-44ab3e939b35 | Score: 0.318 - Why can't my simulation converge?...
Node 1f76cb75-2434-4074-a0bd-cf2fb65be5dc | Score: 0.452 - Simulation stuck on really small time steps...
Node 61d095b2-e4ca-4d95-b389-f42943165653 | Score: 0.483 - Converging but very slowly...
Node fdadb758-53d6-4c35-9011-a21f9a378877 | Score: 0.491 - "Solve did not converge"...
Node cbd347b0-f497-4985-82ee-95d72c626144 | Score: 0.492 - Simulation stuck on really small time steps...


In [17]:
from llama_index.core import PromptTemplate
from llama_index.core.postprocessor import SimilarityPostprocessor
import ollama

template = (
  "In the beginning, please provide the title {metadata['title']} and reference URL {metadata['url']} to the user.\n"
  "Please provide a short summary of the context.\n"
  "---------------------\n"
  "{context_str}\n"
  "---------------------\n"
)
qa_template = PromptTemplate(template)


postprocessor = SimilarityPostprocessor(similarity_cutoff=0.4)

rerank_nodes = postprocessor.postprocess_nodes(retrieved_nodes)

if rerank_nodes:
  for node in rerank_nodes:
      prompt = qa_template.format(context_str=node.text, metadata=node.metadata)
      response = ollama.chat(model="llama3.3", messages=[{"role": "user", "content": prompt}])
      print(response['message']['content'])

else:
   print("No relevant results returned")


KeyboardInterrupt: 