In [69]:
import weaviate
from typing import List

from tqdm.auto import tqdm
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from typing import List, Optional
import math
import os


class RawQuery:
    def __init__(self, client: weaviate.Client, query: str) -> None:
        self._query = query
        self.client = client

    def do(self):
        result = self.client.query.raw(self._query)
        return result


class QueryUnpacker:
    def __init__(self, query: weaviate.gql.Query) -> None:
        self._query = query

    @property
    def query(self):
        if isinstance(self._query, RawQuery):
            raise ValueError(
                'query attribute is a RawQuery object, implementation not done yet')
        else:
            return self._query

    def do(self):

        result = self._query.do()

        if 'errors' in result.keys():
            raise ValueError(
                f'QueryUnpacker at execution of query: \n {result["errors"]}')
        while True:
            key = list(result.keys())[0]
            result = result[key]
            if key in ['Get', 'Aggregate', 'Explore']:
                key = list(result.keys())[0]
                result = result[key]
                break

        return result


# idea: get locals() at beginnig of func, QueryUnpacker(locals())
# return QueryUnpacker

class Query:
    def __init__(self, client: weaviate.Client):
        self._client = client

    def q_class_near_object(self, class_name: str, uuid: str, node_type=None,  properties=[], _additional=[], limit: int = 0):

        _additional = set(_additional)
        _additional.add("distance")
        _additional.add("id")

        # or {"beacon": "weaviate://localhost/e5dc4a4c-ef0f-3aed-89a3-a73435c6bbcf"}
        nearObject = {"id": uuid}
        
        if node_type is not None:
            where_filter = {
                "path": ['type'],
                "operator": 'Equal',
                'valueString': node_type
            }

        q = (
            self._client.query
            .get(class_name, properties)
            # "certainty" only supported if distance==cosine
            .with_additional(list(_additional))
            .with_near_object(nearObject)

        )
        if node_type is not None:
            where_filter = {
                "path": ['type'],
                "operator": 'Equal',
                'valueString': node_type
            }
            q = q.with_where(where_filter)
            
        if limit != 0:
            q = q.with_limit(limit)
        return QueryUnpacker(q)

    def q_class_near_vec(self, class_name, vec, properties=[], _additional=[], certainty: int = 0, distance: int = 0, limit: int = 0) -> QueryUnpacker:
        """Finds objects of specified class close to vector"""

        if certainty != 0 and distance != 0:
            raise ValueError(
                'You can\'t set distance and certainty of near at the same time')

        if 'id' not in _additional:
            _additional.append('id')

        near_vector = {
            "vector": vec
        }
        if certainty != 0:
            near_vector['certainty'] = certainty
        elif distance != 0:
            near_vector['distance'] = distance

        q = (
            self._client.query
            .get(class_name, properties)
            .with_additional(_additional)
            .with_near_vector(near_vector)

        )
        if limit != 0:
            q = q.with_limit(limit)

        return QueryUnpacker(q)

    def q_class_with_attrval(self, class_name: str, attr: str, val: str, attrType: str, operator='Equal', node_type=None, attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Finds objects with value in attribute
            operator:
                Like
                Equal
                more: https://weaviate.io/developers/weaviate/api/graphql/filters#filter-structure
            attrType:
                valueInt: The integer value where the Path's last property name should be compared to.
                valueBoolean: The boolean value that the Path's last property name should be compared to.
                valueString: The string value that the Path's last property name should be compared to.
                valueText: The text value that the Path's last property name should be compared to.
                valueNumber: The number (float) value that the Path's last property name should be compared to.
                valueDate: The date (ISO 8601 timestamp, formatted as RFC3339) value that the Path's last property name should be compared to.
        """
        if 'id' not in _additional:
            _additional.append('id')

        where_filter = {
            "path": [attr],
            "operator": operator,
            attrType: val
        }
        if node_type is not None:
            where_filter = {
                "operator": "And",
                "operands": [{
                        "path": [attr],
                        "operator": operator,
                        attrType: val
                    }, {
                        "path": ["type"],
                        "operator": "Equal",
                        "valueString": node_type,
                    }]
            }

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_additional(_additional)
            .with_where(where_filter)
        )
        return QueryUnpacker(q)
    
    

    def qr_obj(self, class_name: str, id: str,  attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Query one object by id"""
        where_filter = {
            "path": ['id'],
            "operator": "Equal",
            'valueString': id
        }

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_where(where_filter)
            .with_additional(_additional)

        )
        return QueryUnpacker(q)

    def q_class_all(self, class_name: str, attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Get Objects of a class, limit 10000"""
        if 'id' not in _additional:
            _additional.append('id')

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_additional(_additional)
            .with_limit(10000)
        )
        return QueryUnpacker(q)

    def qr_class_all_after_cursor(self, class_name: str, after_uuid: str, limit: int):
        """Get All Objects of a class after specific one """
        _additional = []
        if 'id' not in _additional:
            _additional.append('id')

        # q = (
        #     self._client.query
        #     .get(class_name, attributes)
        #     .with_additional(_additional)
        #     .with_limit(10000)
        # )
        # return QueryUnpacker(q)
        qr = """
                {
                    Get {
                        %s (
                              limit: %s,
                              after: "%s"
                            ) {
                            _additional{id}
                        }
                    }
                }
             """ % (class_name, limit, after_uuid)
        return QueryUnpacker(query=RawQuery(self._client, qr))

    def q_aggregate_class(self, class_name: str) -> QueryUnpacker:

        q = (
            self._client.query.aggregate(class_name)
            .with_meta_count()
        )

        return QueryUnpacker(q)

    # def qraw_get_class_hasvector(self, class_name, has_vector:bool):
    #     """Query class """
    #     qr = """

    #             {
    #                 Get {
    #                     %s (
    #                         where: {
    #                                 path: ["hasVector"],
    #                                 operator: Equal,
    #                                 valueBoolean: %s
    #                             }
    #                         ) {
    #                         _additional{id}
    #                     }
    #                 }
    #             }
    #          """ % (class_name,'true' if has_vector else 'false')
    #     print(RawQuery(self._client, qr).do())
    #     return QueryUnpacker(query=RawQuery(self._client, qr))


    def delete_object(self, class_name: str, uuid: str):
        self._client.data_object.delete(uuid=uuid, class_name=class_name, consistency_level=weaviate.ConsistencyLevel('ALL')  # all replica nodes must acknowledge delete
                                        )

    def get_schema(self):
        return self._client.schema.get()

    def describe_count(self) -> None:
        """Lists count of all objects in db"""

        schema = self.get_schema()

        for _class in schema['classes']:
            r = self.q_aggregate_class(_class['class']).do()
            print(f"""{_class['class']} {r[0]['meta']} """)

    def get_class_objects(self, class_name: str, **kwargs):
        """Return max 100, https://weaviate-python-client.readthedocs.io/en/stable/weaviate.data.html#weaviate.data.DataObject.get """
        r = self._client.data_object.get(
            class_name=class_name,
            **kwargs
        )
        return r

    def get_per_id(self, uuid: str):
        return self._client.data_object.get_by_id(uuid=uuid)

In [70]:
import weaviate

query = Query(weaviate.Client('http://localhost:8081'))
query.describe_count()

Node {'count': 66269} 


In [49]:
result = query.q_class_with_attrval(
    class_name='Node', attr='name', attrType='valueString', val='Data', operator='Like', attributes=['name'], ).do()
print(result)

[{'_additional': {'id': '6b0c0041-a0f6-50b5-b386-a1f61eb93f71'}, 'name': 'Clinical Research Coordinators/Clinical Data Coordinator'}, {'_additional': {'id': 'fa0fb8ea-f7cb-5876-9dfd-79ba1fc288f0'}, 'name': 'Management Analysts/Health Information Management Data Analyst (HIM Data Analyst)'}, {'_additional': {'id': '90c87555-60c8-561f-aa97-435e86ca6072'}, 'name': 'Computer Systems Analysts/EDI Analyst (Electronic Data Exchange Analyst)'}, {'_additional': {'id': '9456520d-3e8c-592c-ba45-81aaba979537'}, 'name': 'Geographic Information Systems Technologists and Technicians/Geographic Information Systems Data Specialist (GIS Data Specialist)'}, {'_additional': {'id': '0b387b5e-4573-503e-8deb-69707c0a4993'}, 'name': 'Information Security Analysts/Supervisory Control and Data Acquisition Security Analyst (SCADA Security Analyst)'}, {'_additional': {'id': '7837ebb5-1ef4-5bc1-8bca-8821bb5eac1e'}, 'name': 'Computer Network Support Specialists/Network Data Specialist'}, {'_additional': {'id': '56d

In [71]:

result = query.q_class_with_attrval(
    class_name='Node', attr='name', attrType='valueString', val='data', operator='Like', attributes=['name'], node_type='Skill' ).do()
print(result)

[{'_additional': {'id': '80b29516-45dd-5e55-bca1-1e4e889bb5aa'}, 'name': 'data at rest'}, {'_additional': {'id': 'ea776eb2-095b-5b7d-bd7d-04581f990ef5'}, 'name': 'data quality initiatives'}, {'_additional': {'id': '197f73e0-2016-5af8-9c28-923bfef3e664'}, 'name': 'data mapping/design'}, {'_additional': {'id': 'e7d50e33-fcf5-528f-9a5d-5d888968acfb'}, 'name': 'big data technologies'}, {'_additional': {'id': 'a52ff698-b42d-5ead-9eea-321a074e3601'}, 'name': 'data pipeline management'}, {'_additional': {'id': '52967ff6-7f71-5b95-89a8-e540e2a1e7b2'}, 'name': 'data network management'}, {'_additional': {'id': 'b43f2b57-95e9-586b-beeb-1c8ff9b1103a'}, 'name': 'data analysis and utilization'}, {'_additional': {'id': '1160edb0-7e9b-5a35-8b05-2e93852db180'}, 'name': 'data sciences'}, {'_additional': {'id': 'eca8b845-73c2-5c52-b199-541f7731aa47'}, 'name': 'employment data management'}, {'_additional': {'id': 'a52b5595-dfc5-5f0e-9149-ac3c2e2583e4'}, 'name': 'data analytics and reporting'}, {'_additio

In [72]:
def get_similar(uuid, which=None):
    # result = query.q_class_near_object('Job', uuid, properties=['name'] ).do()
    # for dict_ in result:
    #     print(dict_['name'], dict_['_additional']['distance'])
        
    
    result = query.q_class_near_object('Node', uuid, properties=['name'], node_type=which).do()
    for dict_ in result:
        print(dict_['name'], dict_['_additional']['distance'])

In [77]:
get_similar('32acd03f-fdd9-5018-9621-7033578192f5', 'Job')

Information Security Analysts/Security Specialist 0.85661536
Information Security Analysts/Information Security Officer 0.85763896
Compliance Managers/Governance Compliance and Risk Manager (GCR Manager) 0.86081785
Information Security Analysts/Information Technology Security Architect (IT Security Architect) 0.8617028
Geographic Information Systems Technologists and Technicians/Geospatial Specialist 0.8620246
Information Security Analysts/Information Security Specialist 0.86208117
Computer Programmers/Database Programmer 0.86232823
Database Administrators/Database Developer 0.8623396
Software Developers/Database Developer 0.8626517
Information Security Analysts/Information Systems Security Specialist 0.86451817
Firefighters/Forest Fire Suppression Specialist 0.8657596
Information Security Analysts/Systems Security Specialist 0.8660168
Information Technology Project Managers/Database Development Project Manager 0.8661678
Environmental Engineering Technologists and Technicians/Haz Tech 

In [None]:
#  query = Query(weaviate.Client('http://localhost:8081'))
#     vector = np.load('coe-da-pa-ssc.npz')['vectors'][100]

#     q = query.q_class_near_vec('Image', vector, _additional=['distance'])
#     result = q.do()

In [None]:
 # q_class_with_attrval
