In [None]:
from llama_index.core import VectorStoreIndex, Settings, StorageContext
from llama_index.core.schema import TextNode, NodeRelationship, RelatedNodeInfo
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.vector_stores.elasticsearch import ElasticsearchStore
import json
import torch_directml

In [None]:
# Configure the GPU with DirectML (Only for not cuda GPUs, if you have cuda use torch instead)
print("Torch DirectML available:", torch_directml.is_available())
device = torch_directml.device()

input_sections_path = "custom_sections_512.json"

In [None]:
# Load the embedding model
embedding_model_name = "ibm-granite/granite-embedding-30m-english"
embed_model = HuggingFaceEmbedding(
    model_name=embedding_model_name,
    device=device, 
    normalize=True
)

# Move the model to GPU if available
embed_model._model = embed_model._model.to(device)
print(f"Model moved to {device}")
Settings.embed_model = embed_model

In [None]:
line_counter = 0
all_nodes = []
with open(input_sections_path, "r") as file:
    for line in file:
        line_documents = json.loads(line)
        for document in line_documents:
            document_id = document["id"]
            sections = document["sections"]
            title = document["title"]
            document_nodes = []

            # Create a node for each section
            for section in sections:
                node = TextNode(
                    text = section["text"], 
                    metadata = {
                        "document_title": title, 
                        "document_id": document_id, 
                        "section_title": section["title"]
                    }
                )
                document_nodes.append(node)
            
            # Create relationships between nodes
            if len(document_nodes) > 1:
                # First node 
                document_nodes[0].relationships[NodeRelationship.NEXT] = RelatedNodeInfo(
                    node_id=document_nodes[1].node_id
                )
                
                for i in range(1, len(document_nodes) - 1):
                    document_nodes[i].relationships[NodeRelationship.NEXT] = RelatedNodeInfo(
                        node_id=document_nodes[i + 1].node_id
                    )
                    document_nodes[i].relationships[NodeRelationship.PREVIOUS] = RelatedNodeInfo(
                        node_id=document_nodes[i - 1].node_id
                    )
                    
                # Last node
                document_nodes[-1].relationships[NodeRelationship.PREVIOUS] = RelatedNodeInfo(
                    node_id=document_nodes[-2].node_id
                )

            all_nodes.extend(document_nodes)
        
        print(f"Line {line_counter + 1}, nodes created: {len(all_nodes)}")
                
        line_counter = line_counter + 1

In [None]:
# Populate the Database (elasticsearch) with the nodes

elasticsearch_store = ElasticsearchStore(
    index_name="techqa-index",
    es_url="http://localhost:9200"
)
storage_context = StorageContext.from_defaults(vector_store=elasticsearch_store)
index = VectorStoreIndex(all_nodes, storage_context=storage_context, show_progress=True)
        