In [20]:
# Verify weviate-client is installed and the database is live and ready
import weaviate
client = weaviate.Client("http://localhost:8080")
assert client.is_live()
assert client.is_ready()
client.get_meta()

{'hostname': 'http://[::]:8080', 'modules': {}, 'version': '1.19.2'}

In [21]:
# !!!! Delete data in Weaviate
client.schema.delete_class("Wiki_Node")

In [22]:
import html2text
text_maker = html2text.HTML2Text()
text_maker.ignore_links = True
text_maker.ignore_images = True

def wiki_to_text(ancestors, title, labels, body):
    body_text = text_maker.handle(body)
    text =  f"Title: {title}\n"
    if ancestors: text += f"Ancestors: {ancestors}\n" 
    if labels: text += f"Labels: {ancestors}\n"
    text += f"{body_text}"
    return text


In [23]:
# Walk cached Wiki pages
import os
import re
n = 0
for root, dirs, files in os.walk("../data/wiki"):
    for name in files:
        filepath = os.path.join(root, name)
        with open(filepath) as f:
            link = f.readline().rstrip()
            ancestors = f.readline().rstrip()
            title = f.readline().rstrip()
            labels = f.readline().rstrip()
            body = re.sub('[\n]+', '\n', "".join(f.readlines()))
            text = wiki_to_text(ancestors, title, labels, body)
            if n<1:
                print(name)
                print(link)
                print(text)
            n += 1


30746710
https://wikis.janelia.org/display/ScientificComputing/Scientific+Computing+Server+-+e03u07
Title: Scientific Computing Server - e03u07
Ancestors: Home / Joint SciComp Systems/Software pages / SCSW Servers
Labels: Home / Joint SciComp Systems/Software pages / SCSW Servers
Description| JACS Prod Swarm (NEW)  
  
---|---  
OS| OL 9.1  
Software| Docker  
Hardware| Dell R6515  
Cores| 64  
RAM (GB)| 128  
Disks| 2 x 2TB NVMe in raid1  
  
Network

|

Internal  
  
IP

|

10.40.2.134  
  
Canonical name

|

e03u07.int.janelia.org  
  
Aliases

|

  
  
  
POC|

Cristian Goina  
  
Warranty Ends| 2026-07-10  
  
## Purpose

The purpose of the server in greater detail than the short description above.

## Software

  * List of
  * all the
  * software installed
  * on this server.
  * If we had to recreate the server from scratch, what would we need to think about installing?

## Configuration

Install and configure fuse for s3fs:

    
    
    sudo dnf install s3fs-fuse  
    sudo 

In [24]:
# Create Documents from cached wiki files
from llama_index import Document
documents = []
for root, dirs, files in os.walk("../data/wiki"):
    for name in files:
        filepath = os.path.join(root, name)
        with open(filepath) as f:
            link = f.readline().rstrip()
            ancestors = f.readline().rstrip()
            title = f.readline().rstrip()
            labels = f.readline().rstrip()
            body = re.sub('[\n]+', '\n', "".join(f.readlines()))
            text = wiki_to_text(ancestors, title, labels, body)
            doc = Document(text, doc_id=name, extra_info={"title": title, "link": link})
            documents.append(doc)

print(f"Loaded {len(documents)} documents")

Loaded 520 documents


In [25]:
documents[40]

Document(text='Title: Fly Assistant Jetson Board Setup\nAncestors: Home / Scientific Computing Software / Technology\nLabels: Home / Scientific Computing Software / Technology\n\n\nThese are instructions for setting up a new Jetson board and installing/using\ncustom software for the board.\n\n## Flashing TX2 board with Jetpack, Leopard Imaging drivers\n\n _Prerequisites:_ Separate Ubuntu PC for flashing Jetson board, Nvidia Jetson\nTX2 board, Leopard CSI Camera Carrier Board/Kit\n(https://leopardimaging.com/product/li-jetson-kit-imx185cs-x/)\n\n  1. Register for Nvidia Developer Program.\n  2. Login into your account and download Jetpack 3.2.\n  3. Follow the install guide to install Jetpack on your Ubuntu machine. \n    1. **_NOTE:_** _make sure you have "sudo" privileges to the file share you are installing the Jetpack to. It\'s recommended to install to /opt since the Systems group is more likely to give you sudo access to that partition. The installer may ask for root password a fe

In [26]:
from llama_index import LLMPredictor, PromptHelper, ServiceContext
from langchain.chat_models import ChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding

llm = ChatOpenAI(temperature=0.2, model_name="gpt-3.5-turbo-0301")
llm_predictor = LLMPredictor(llm=llm)
embed_model = LangchainEmbedding(OpenAIEmbeddings())

max_input_size = 4096
num_output = 256
max_chunk_overlap = 20
prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, embed_model=embed_model, prompt_helper=prompt_helper)

In [27]:
# Calculate embedding for all of the documents and save them into Weaviate
from llama_index import GPTVectorStoreIndex
from llama_index.vector_stores import WeaviateVectorStore
from llama_index.storage.storage_context import StorageContext

class_prefix = "Wiki"
vector_store = WeaviateVectorStore(weaviate_client=client, class_prefix=class_prefix)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# persists the vector_store into Weaviate
index = GPTVectorStoreIndex.from_documents(documents, storage_context=storage_context, service_context=service_context)

# persist the docstore and index_store
# this is currently required although in theory Weaviate should be able to handle these as well
storage_context.persist(persist_dir='../storage/index')

In [28]:
def get_unique_nodes(nodes):
    docs_ids = set()
    unique_nodes = list()
    for node in nodes:
        if node.node.ref_doc_id not in docs_ids:
            docs_ids.add(node.node.ref_doc_id)
            unique_nodes.append(node)
    return unique_nodes
        
def print_response(response):
    print(response.response)    
    for node in get_unique_nodes(response.source_nodes):
        print(f"{node.node.extra_info['title']}")
        print(f"\t{node.node.extra_info['link']}")
        
def query(question, n=5):   
    query_engine = index.as_query_engine(similarity_top_k=n)
    res = query_engine.query(question)
    print_response(res)
        

In [29]:
from llama_index.retrievers import VectorIndexRetriever
from llama_index.vector_stores.types import VectorStoreQueryMode
from llama_index import ResponseSynthesizer
from llama_index.query_engine import RetrieverQueryEngine

# configure retriever
retriever = VectorIndexRetriever(
    index,
    similarity_top_k=5,
    vector_store_query_mode=VectorStoreQueryMode.HYBRID,
    alpha=0.7,
)

# configure response synthesizer
synth = ResponseSynthesizer.from_args()

# construct query engine
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=synth,
)

query("On what physical server is Nextflow Tower installed, and where will it be moving to?")

The new context provided is not related to the original question about the physical server where Nextflow Tower is installed and its potential move. Therefore, the original answer still stands: there are two servers mentioned where Nextflow Tower is installed: c13u05 and nextflow.int.janelia.org, and there is no mention of Nextflow Tower moving to a different physical server.
Using Nextflow Tower
	https://wikis.janelia.org/display/SCSW/Using+Nextflow+Tower
Scientific Computing Server - e06u05
	https://wikis.janelia.org/display/ScientificComputing/Scientific+Computing+Server+-+e06u05
Scientific Computing Server - c13u05
	https://wikis.janelia.org/display/ScientificComputing/Scientific+Computing+Server+-+c13u05
Software Infrastructure
	https://wikis.janelia.org/display/SCSW/Software+Infrastructure


In [18]:
query("What interest groups does Scientific Computing sponsor?")

The provided context is not useful for refining the original answer as it does not provide any information about the interest groups sponsored by Scientific Computing. Therefore, the original answer remains the same.
Interest Groups
	https://wikis.janelia.org/display/SCSW/Interest+Groups
Scientific Computing Systems
	https://wikis.janelia.org/display/SCS/Scientific+Computing+Systems
High Performance Computing
	https://wikis.janelia.org/display/SCSW/High+Performance+Computing
Open Science Software
	https://wikis.janelia.org/display/SCSW/Open+Science+Software
