In [2]:
# Verify weviate-client is installed and the database is live and ready
import weaviate
client = weaviate.Client("http://localhost:8080")
assert client.is_live()
assert client.is_ready()
client.get_meta()

{'hostname': 'http://[::]:8080', 'modules': {}, 'version': '1.20.5'}

In [26]:
client.query.get(class_name='Web_Node', properties='doc_id,_node_content').with_limit(5).do()

{'data': {'Get': {'Web_Node': [{'_node_content': '{"id_": "00026e5c-7495-436b-bdec-f781c8666be2", "embedding": null, "metadata": {"source": "web", "title": "Wide-field feedback neurons dynamically tune early visual processing. | Janelia Research Campus", "link": "https://www.janelia.org/publication/wide-field-feedback-neurons-dynamically-tune-early-visual-processing"}, "excluded_embed_metadata_keys": [], "excluded_llm_metadata_keys": [], "relationships": {"1": {"node_id": "https://www.janelia.org/publication/wide-field-feedback-neurons-dynamically-tune-early-visual-processing", "node_type": null, "metadata": {"source": "web", "title": "Wide-field feedback neurons dynamically tune early visual processing. | Janelia Research Campus", "link": "https://www.janelia.org/publication/wide-field-feedback-neurons-dynamically-tune-early-visual-processing"}, "hash": "dba37301544ad31d020db586b8cccc5c39cf656ed09ff97dfd1ac70ef0842e18"}}, "hash": "e058aa7c5c3aa215752fa0f989cbd34f60b29d9d12758a14087cff

In [27]:

where_filter = {
  "path": ["doc_id"],
  "operator": "Equal",
  "valueString": "https://www.janelia.org/people/harikrushnan-balasubramanian"
}

query_result = (
  client.query
  .get(class_name='Web_Node', properties="doc_id,_node_content,text").with_additional(["id", "vector","distance"])
  .with_where(where_filter)
  .do()
)

query_result


{'data': {'Get': {'Web_Node': [{'_additional': {'distance': None,
      'id': '60600d14-90c2-4003-b527-bc4a887c1ae5',
      'vector': [-0.011299508,
       -0.005030938,
       0.02350801,
       -0.014061454,
       -0.007502705,
       0.014823611,
       -0.020123752,
       0.005754638,
       -0.022724876,
       -0.012411279,
       0.005709188,
       -0.0016536716,
       0.0029332573,
       0.004562456,
       0.01928468,
       -0.017228952,
       0.044358958,
       -0.00979617,
       -0.018711314,
       -0.018921081,
       -0.044386927,
       0.025955303,
       0.017187,
       0.0023494025,
       -0.025829442,
       0.029731128,
       0.011726037,
       -0.024640758,
       -0.017746381,
       -0.026151087,
       0.0039296555,
       -0.000621875,
       -0.016501756,
       0.0041324315,
       -0.026906252,
       -0.0050693955,
       0.009020028,
       0.004198858,
       0.011642129,
       0.0022095572,
       0.014012507,
       -0.0018984011,
       -

In [19]:
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding

query = "Who is the director of Scientific Computing?"


embed_model = LangchainEmbedding(OpenAIEmbeddings())
vector = embed_model.get_query_embedding(query)
class_name = 'Test_Node'
query_result = (
    client.query
    .get(class_name=class_name, properties="doc_id,extra_info,text")
    .with_additional("distance")
    .with_near_vector({"vector": vector})
    .with_limit(50)
    .do()
)

docs = query_result['data']['Get'][class_name]
print(f"Found {len(docs)} results")
for doc in docs:
    distance = doc['_additional']['distance']
    print("---------")
    print(distance)
    print(doc['text'])



Found 50 results
---------
0.21014386
Mark Kittisopikul said: https://bssw.io/blog_posts/julia-s-value-proposition-for-better-scientific-software
Julia's Value Proposition for Better Scientific Software
Julia provides a mathematical front end to LLVM to provide easy and performant CPU and GPU access and lightweight interoperability with existing C, Fortran, R, and Python codes, coupled with a rich unified ecosystem for packaging, data science, and interactive computing. Hence, Julia fills a gap at the intersection of high performance and high productivity for scientific software.

---------
0.21402836
William Katz said: Good news: Habib has just confirmed he'll be presenting on May 23, 2 pm. He'll go through how he approached the IceCube Kaggle competition (holding 1st for a while and finishing 2nd) and the various tools he used. I'll see if we can bring in external speakers occasionally to show how they approached and implemented solutions to various problems. If you know someone who 

In [66]:

additional_clause = {
  "featureProjection": [
    "vector"
  ]
}
additional_setting = {
  "dimensions": 2
}

query_result = (
  client.query
  .get('Wiki_Node', "doc_id,extra_info,text")
  .with_near_vector({"vector": vector})
  .with_limit(5)
  .with_additional(
    (additional_clause, additional_setting)
  )
  .do()
)
print(query_result)

{'errors': [{'locations': [{'column': 34499, 'line': 1}], 'message': 'Cannot query field "featureProjection" on type "Wiki_NodeAdditional".', 'path': None}]}


In [67]:
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding

embed_model = LangchainEmbedding(OpenAIEmbeddings())
vector = embed_model.get_query_embedding("How do I run MATLAB on the cluster?")


query_result = (
    client.query
    .get(class_name='Slack_Node', properties="text")
    .with_near_vector({"vector": vector})
    .with_limit(5)
    .do()
)

query_result

{'data': {'Get': {'Slack_Node': [{'text': 'Mark Kittisopikul said: Run and write MATLAB from VSCode:\nhttps://blogs.mathworks.com/matlab/2023/04/26/do-you-use-visual-studio-code-matlab-is-now-there-too/\nDo you use Visual Studio Code? MATLAB is now there too.\nAlong with many other developers, it was love at first sight for me when I first experienced Visual Studio Code. Highly customisable, easy to use and available for all 3 major operating systems; it took hardly any time at all for me to switch from using a plethora of editors across all my machines to using just two – The\nMagdalena Schneider said: The current version does not seem to let you run the MATLAB code directly within VSCode, though: "it doesn’t include things such as the ability to execute MATLAB code or debugging support".\n'},
    {'text': "could not be found\n<Figure size 432x432 with 4 Axes>\n\nMark Kittisopikul said: Hmm, let me try rebooting this Jupyter server\nMark Kittisopikul said: ok that helped\nMark Kittiso

In [46]:

query = "Singapore"
query_result = (
    client.query
    .get(class_name='Web_Node', properties="text")
    .with_bm25(query=query)
    .with_limit(1)
    .do()
)
print(query_result)
print()

query = "ipalm"
query_result = (
    client.query
    .get(class_name='Web_Node', properties="text,link,title")
    .with_bm25(query=query)
    .with_limit(10)
    .do()
)
results = query_result['data']['Get']['Web_Node']
print(f"{len(results)} results found")
for result in results:
    print(result)


{'data': {'Get': {'Web_Node': [{'text': "People / Harikrushnan Balasubramanian / Integrative Imaging\n\nLinkedInORCIDGoogle ScholarTwitterAdvanced Imaging Centre (AIC)\n\nAdvanced Microscopy Fellow\n\n##  Biography\n\nHari joined the AIC in June 2022 as an Advanced Microscopy Fellow. Prior to\njoining the AIC, Hari was a postdoctoral research fellow at the National\nUniversity of Singapore where he had also obtained his PhD. His PhD research\nin Prof. Thorsten Wohland's lab involved investigating the plasma membrane\nbehavior of the epidermal growth factor receptor using fluorescence\nspectroscopy and super-resolution techniques. At the AIC, Hari is in-charge of\nthe iPALM super-resolution microscope.\n\n##  Education\n\nPhD, Biological Sciences, National University of Singapore | SINGAPORE\n\nBachelors in Technology (B. Tech.), Biotechnology, SRM University | Chennai\nINDIA\n\n##  Featured Publications\n\nSimultaneous spatiotemporal super-resolution and multi-parametric fluorescence\n

In [49]:
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding

query = "Who is from Singapore?"
embed_model = LangchainEmbedding(OpenAIEmbeddings())
vector = embed_model.get_query_embedding(query)
query_result = (
    client.query
    .get(class_name='Web_Node', properties="doc_id,text,title")
    .with_hybrid(query=query,vector=vector,alpha=0.50,properties=["text"])
    .with_limit(3)
    .do()
)

query_result

{'data': {'Get': {'Web_Node': [{'doc_id': 'https://www.janelia.org/people/sue-ann-koay',
     'text': 'People / Sue Ann Koay / Koay Lab\n\nGroup Leader\n\n##  Biography\n\nSue Ann is a novelty junkie who fell in love with the pursuit of scientific\nmysteries, and has meandered around in academia ever since. She spent her\nPh.D. years in experimental high energy physics, then thought it might be fun\nto switch from the pristine fundamental constituents of the universe to the\nemergent mess of biological systems that amazingly manages to swim, walk, fly,\nand generally proliferate everywhere. She is currently preoccupied with\ndeciphering the nature of activity in the brain and how it permits animals to\nbehave in ways never imagined by evolution.\n\n##  Education\n\nBS, Summa Cum Laude, Physics, San Jose State University\n\nBS, Summa Cum Laude, Computer science, San Jose State University\n\nPh.D., Physics, University of California - Santa Barbara',
     'title': 'Sue Ann Koay | Janelia 

In [45]:
query = "flatpak"
query_result = (
    client.query
    .get(class_name='Slack_Node', properties="text")
    .with_bm25(query=query)
    .with_limit(5)
    .do()
)

query_result

{'data': {'Get': {'Slack_Node': []}}}

In [11]:
client.schema.get("Web_Node")

{'class': 'Web_Node',
 'description': 'Class for Web_Node',
 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
  'cleanupIntervalSeconds': 60,
  'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
 'properties': [{'dataType': ['text'],
   'description': 'Text property',
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'text',
   'tokenization': 'whitespace'},
  {'dataType': ['text'],
   'description': 'The ref_doc_id of the Node',
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'ref_doc_id',
   'tokenization': 'whitespace'},
  {'dataType': ['text'],
   'description': 'node_info (in JSON)',
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'node_info',
   'tokenization': 'whitespace'},
  {'dataType': ['text'],
   'description': 'The relationships of the node (in JSON)',
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'relationships',
   'tokenization': 'whitespace'},
  {'dataType': ['text'],


In [35]:
from datetime import datetime

query_result = (
    client.query
    .get(class_name='SurveyResponses', properties=["query","survey"],)
    .with_limit(5)
    .with_additional(['creationTimeUnix'])
    .do()
)

arr = query_result['data']['Get']['SurveyResponses']

for obj in arr:
    timestamp = int(obj['_additional']['creationTimeUnix']) / 1000.0
    date = datetime.fromtimestamp(timestamp)
    print(date.strftime("%Y-%m-%d %I:%M %p"))
    print(f"{obj['survey']} - {obj['query']}")


2023-05-28 08:17 PM
Yes - What's the difference between pip and conda?
2023-05-28 08:44 PM
Yes - How do I use singularity?
2023-05-29 09:44 AM
NO - Where can I get more information about the "MDAS storage system" at Janelia?


In [6]:
# select object based on some criteria

where_filter = {
  "path": ["source"],
  "operator": "Equal",
  "valueString": "Slack"
}
query_result = (
  client.query
  .get(class_name='Janelia_Node', properties="doc_id,_node_content,text").with_additional(["id", "vector","distance"])
  .with_where(where_filter)
  .do()
)

query_result


{'data': {'Get': {'Janelia_Node': []}}}

In [4]:
# delete objects based on some criteria

result = (
    client.batch.delete_objects(
        class_name='Janelia_Node',
        where={
            'path': ['source'],
            'operator': 'Equal',
            'valueText': 'Wiki'
        },
        dry_run=True,
        output='verbose'
    )
)

result

{'dryRun': False,
 'match': {'class': 'Janelia_Node',
  'where': {'operands': None,
   'operator': 'Equal',
   'path': ['source'],
   'valueText': 'Wiki'}},
 'output': 'verbose',
 'results': {'failed': 0,
  'limit': 10000,
  'matches': 1002,
  'objects': [{'id': 'f98e8773-a765-46ba-a937-69b8d78e641a',
    'status': 'SUCCESS'},
   {'id': '339ae381-ea48-4526-8de1-9ad7a2a4cb44', 'status': 'SUCCESS'},
   {'id': 'e77d9b07-4292-4082-8967-8566f848dfb3', 'status': 'SUCCESS'},
   {'id': '98aa1fae-e65e-443b-8fea-2003f45d21e2', 'status': 'SUCCESS'},
   {'id': 'ee2caa95-a92f-4f7c-8f6d-b0022ae4df13', 'status': 'SUCCESS'},
   {'id': 'e29becbd-819e-4d98-8c6d-5a43c7d667a0', 'status': 'SUCCESS'},
   {'id': '2094fa5d-a063-451e-b113-4824a99b6811', 'status': 'SUCCESS'},
   {'id': 'b33bef8b-fa28-44d1-8ab0-37214112768c', 'status': 'SUCCESS'},
   {'id': '600817aa-7efb-4e2c-9590-5dacb8d0b465', 'status': 'SUCCESS'},
   {'id': 'f5c0f7ad-d93b-47cb-a071-0abe829f2bc4', 'status': 'SUCCESS'},
   {'id': 'ea92abb6-6b7

In [20]:

query_result = (
  client.query
  .get(class_name='Slack_Node', properties="doc_id,channel,ts")
  #.with_additional(["id", "vector"])
  .with_additional(["id","creationTimeUnix","lastUpdateTimeUnix","vector"])
  .do()
)

query_result


{'data': {'Get': {'Slack_Node': [{'_additional': {'creationTimeUnix': '1691676418490',
      'id': '0001a257-6d5a-4da2-a0e2-36c0f96b0576',
      'lastUpdateTimeUnix': '1691676418490',
      'vector': [-0.019482959,
       0.015993472,
       0.0050125,
       -0.027712326,
       0.007807723,
       0.007553281,
       -0.011355366,
       -0.024121065,
       -0.027566932,
       -0.031928785,
       0.011915137,
       0.018072624,
       -0.008934535,
       0.019977301,
       -0.003998368,
       0.014212381,
       0.017738214,
       0.01244583,
       0.016705908,
       0.028482921,
       0.0019592002,
       0.004503616,
       -0.026563704,
       -0.0014166946,
       -0.016182486,
       0.01334001,
       0.025109753,
       -0.024775343,
       0.0011640704,
       -0.002895182,
       0.015978932,
       -0.00084283785,
       -0.043182377,
       -0.020006381,
       -0.0075314716,
       -0.020079078,
       -0.022318164,
       -0.004641742,
       0.050830163,
    