In [2]:
# Verify weviate-client is installed and the database is live and ready
import weaviate
client = weaviate.Client("http://localhost:8080")
assert client.is_live()
assert client.is_ready()
client.get_meta()

{'hostname': 'http://[::]:8080', 'modules': {}, 'version': '1.19.2'}

In [63]:
client.query.get(class_name='Janelia_Node', properties='doc_id,extra_info').with_limit(5).do()

{'data': {'Get': {'Janelia_Node': [{'doc_id': 'ce5281da-16ba-4869-8dc3-7196c5273112',
     'extra_info': '{"source": "wiki", "title": "SCA 3D Graphics Solution: Direct Volume Rendering", "link": "https://wikis.janelia.org/display/SCSW/SCA+3D+Graphics+Solution%3A+Direct+Volume+Rendering"}'},
    {'doc_id': 'ffc8be3b-5c71-4da5-affb-7c665ea7d52e',
     'extra_info': '{"source": "slack", "channel": "C042F5YFS03", "ts": "1661973693.260849"}'},
    {'doc_id': 'e0e3417b-5196-4b7b-8dca-c4ac7d04c7d2',
     'extra_info': '{"source": "slack", "channel": "C0146BJ38PQ", "ts": "1600290603.043100"}'},
    {'doc_id': '5df1f8ca-9ac4-4aa0-a7dc-41a1a8e059d6',
     'extra_info': '{"source": "slack", "channel": "C01H5PYR4TW", "ts": "1670970129.549559"}'},
    {'doc_id': 'b21df203-2ecd-4bc4-8e92-98f907c83ae0',
     'extra_info': '{"source": "slack", "channel": "C02K252DJ86", "ts": "1643659878.893279"}'}]}}}

In [64]:

where_filter = {
  "path": ["id"],
  "operator": "Equal",
  "valueString": "00183bb7-7a99-4a07-af80-6946e4512231"
}

query_result = (
  client.query
  .get(class_name='Wiki_Node', properties="doc_id,extra_info,text").with_additional(["id", "vector","distance"])
  .with_where(where_filter)
  .do()
)

query_result


{'data': {'Get': {'Wiki_Node': [{'_additional': {'distance': None,
      'id': '00183bb7-7a99-4a07-af80-6946e4512231',
      'vector': [-0.0074045537,
       0.0011069621,
       -0.014764456,
       0.0041301777,
       -0.006284568,
       0.009637082,
       -0.031314936,
       -0.004721798,
       -0.029022872,
       -0.035125114,
       0.017339306,
       -0.0013702144,
       0.0034306524,
       0.01866394,
       0.009257551,
       0.022831324,
       0.020241592,
       -0.027221967,
       -0.02597175,
       0.0016688152,
       0.007731991,
       -0.0018911378,
       -0.03682184,
       -0.02031601,
       -0.024394095,
       0.027073132,
       0.02976705,
       -0.009346853,
       -0.038965065,
       -0.0009851032,
       0.04655566,
       -0.016669547,
       -0.010902181,
       0.0076873405,
       -0.005253884,
       -0.012137514,
       0.016223041,
       -0.036047895,
       0.052419774,
       0.024275027,
       0.0271922,
       0.005592484,
       -

In [65]:
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding

embed_model = LangchainEmbedding(OpenAIEmbeddings())
vector = embed_model.get_query_embedding("Hierarchical Alignment")


query_result = (
    client.query
    .get(class_name='Wiki_Node', properties="doc_id,extra_info,text")
    .with_near_vector({"vector": vector})
    .with_limit(5)
    .do()
)

query_result



{'data': {'Get': {'Wiki_Node': [{'doc_id': '6546eabe-42ff-448e-afca-fa300a56b9a3',
     'extra_info': '{"source": "wiki", "title": "Male Brain FIB-SEM Experiments Alignment", "link": "https://wikis.janelia.org/display/ScientificComputing/Male+Brain+FIB-SEM+Experiments+Alignment"}',
     'text': '* *\n\n18-Volumes overview:\n\nRanking: 1 15 2 14 3 16 9 5 10 7 11 8 12 4 6 13 17 18\n\nSerial| label| Acquire collection| Translation collection| Final alignment\n(affine) collection| Scale 1.0 (full scale) image stack location on shared\nfile system| Ranking (low is best)  \n---|---|---|---|---|---|---  \n1| D07_08| Z0416_04male_Sec10_D07_08_acquire|\ntemp_Z0416_04male_Sec10_D07_08_acquire| CATMAID VIEW|\n/groups/flyem/data/khairy_alignments/flyem_04male_082017/Fine_Z0416_04male_Sec10_D07_08_acquire/scape_20171002_173154/000|\n1  \n2| D08_09| Z0416_04male_Sec10_D08_09_acquire|\ntemp_Z0416_04male_Sec10_D08_09_acquire| CATMAID VIEW|\n/groups/flyem/data/khairy_alignments/flyem_04male_082017/Fine

In [66]:

additional_clause = {
  "featureProjection": [
    "vector"
  ]
}
additional_setting = {
  "dimensions": 2
}

query_result = (
  client.query
  .get('Wiki_Node', "doc_id,extra_info,text")
  .with_near_vector({"vector": vector})
  .with_limit(5)
  .with_additional(
    (additional_clause, additional_setting)
  )
  .do()
)
print(query_result)

{'errors': [{'locations': [{'column': 34499, 'line': 1}], 'message': 'Cannot query field "featureProjection" on type "Wiki_NodeAdditional".', 'path': None}]}


In [67]:
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding

embed_model = LangchainEmbedding(OpenAIEmbeddings())
vector = embed_model.get_query_embedding("How do I run MATLAB on the cluster?")


query_result = (
    client.query
    .get(class_name='Slack_Node', properties="text")
    .with_near_vector({"vector": vector})
    .with_limit(5)
    .do()
)

query_result

{'data': {'Get': {'Slack_Node': [{'text': 'Mark Kittisopikul said: Run and write MATLAB from VSCode:\nhttps://blogs.mathworks.com/matlab/2023/04/26/do-you-use-visual-studio-code-matlab-is-now-there-too/\nDo you use Visual Studio Code? MATLAB is now there too.\nAlong with many other developers, it was love at first sight for me when I first experienced Visual Studio Code. Highly customisable, easy to use and available for all 3 major operating systems; it took hardly any time at all for me to switch from using a plethora of editors across all my machines to using just two – The\nMagdalena Schneider said: The current version does not seem to let you run the MATLAB code directly within VSCode, though: "it doesn’t include things such as the ability to execute MATLAB code or debugging support".\n'},
    {'text': "could not be found\n<Figure size 432x432 with 4 Axes>\n\nMark Kittisopikul said: Hmm, let me try rebooting this Jupyter server\nMark Kittisopikul said: ok that helped\nMark Kittiso

In [70]:
query = "guix"
query_result = (
    client.query
    .get(class_name='Slack_Node', properties="text")
    .with_bm25(query=query)
    .with_limit(1)
    .do()
)
print(query_result)

query = "flatpak"
query_result = (
    client.query
    .get(class_name='Slack_Node', properties="text")
    .with_bm25(query=query)
    .with_limit(1)
    .do()
)
print(query_result)

{'data': {'Get': {'Slack_Node': [{'text': "Mark Kittisopikul said: The snap, flatpak, guix stuff is more on the deployment side, I think. That's next meeting.\n"}]}}}
{'data': {'Get': {'Slack_Node': [{'text': "Mark Kittisopikul said: The snap, flatpak, guix stuff is more on the deployment side, I think. That's next meeting.\n"}]}}}


In [44]:
from langchain.embeddings import OpenAIEmbeddings
from llama_index import LangchainEmbedding

query = "flatpak"
embed_model = LangchainEmbedding(OpenAIEmbeddings())
vector = embed_model.get_query_embedding(query)


query_result = (
    client.query
    .get(class_name='Slack_Node', properties="text")
    .with_hybrid(query=query,vector=vector,alpha=0)
    .with_limit(5)
    .do()
)

query_result

{'data': {'Get': {'Slack_Node': []}}}

In [45]:
query = "flatpak"
query_result = (
    client.query
    .get(class_name='Slack_Node', properties="text")
    .with_bm25(query=query)
    .with_limit(5)
    .do()
)

query_result

{'data': {'Get': {'Slack_Node': []}}}

In [53]:
client.schema.get("Slack_Node")

{'class': 'Slack_Node',
 'description': 'Class for Slack_Node',
 'invertedIndexConfig': {'bm25': {'b': 0.75, 'k1': 1.2},
  'cleanupIntervalSeconds': 60,
  'stopwords': {'additions': None, 'preset': 'en', 'removals': None}},
 'properties': [{'dataType': ['text'],
   'description': 'Text property',
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'text',
   'tokenization': 'whitespace'},
  {'dataType': ['text'],
   'description': 'Document id',
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'doc_id',
   'tokenization': 'whitespace'},
  {'dataType': ['text'],
   'description': 'extra_info (in JSON)',
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'extra_info',
   'tokenization': 'whitespace'},
  {'dataType': ['text'],
   'description': 'The ref_doc_id of the Node',
   'indexFilterable': True,
   'indexSearchable': True,
   'name': 'ref_doc_id',
   'tokenization': 'whitespace'},
  {'dataType': ['text'],
   'description': 'node_info 

In [35]:
from datetime import datetime

query_result = (
    client.query
    .get(class_name='SurveyResponses', properties=["query","survey"],)
    .with_limit(5)
    .with_additional(['creationTimeUnix'])
    .do()
)

arr = query_result['data']['Get']['SurveyResponses']

for obj in arr:
    timestamp = int(obj['_additional']['creationTimeUnix']) / 1000.0
    date = datetime.fromtimestamp(timestamp)
    print(date.strftime("%Y-%m-%d %I:%M %p"))
    print(f"{obj['survey']} - {obj['query']}")


2023-05-28 08:17 PM
Yes - What's the difference between pip and conda?
2023-05-28 08:44 PM
Yes - How do I use singularity?
2023-05-29 09:44 AM
NO - Where can I get more information about the "MDAS storage system" at Janelia?
