In [2]:
import weaviate
from typing import List

from tqdm.auto import tqdm
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from typing import List, Optional
import math
import os


class RawQuery:
    def __init__(self, client: weaviate.Client, query: str) -> None:
        self._query = query
        self.client = client

    def do(self):
        result = self.client.query.raw(self._query)
        return result


class QueryUnpacker:
    def __init__(self, query: weaviate.gql.Query) -> None:
        self._query = query

    @property
    def query(self):
        if isinstance(self._query, RawQuery):
            raise ValueError(
                'query attribute is a RawQuery object, implementation not done yet')
        else:
            return self._query

    def do(self):

        result = self._query.do()

        if 'errors' in result.keys():
            raise ValueError(
                f'QueryUnpacker at execution of query: \n {result["errors"]}')
        while True:
            key = list(result.keys())[0]
            result = result[key]
            if key in ['Get', 'Aggregate', 'Explore']:
                key = list(result.keys())[0]
                result = result[key]
                break

        return result


# idea: get locals() at beginnig of func, QueryUnpacker(locals())
# return QueryUnpacker

class Query:
    def __init__(self, client: weaviate.Client):
        self._client = client

    def q_class_near_object(self, class_name: str, uuid: str,  properties=[], _additional=[], limit: int = 0):

        _additional = set(_additional)
        _additional.add("distance")
        _additional.add("id")

        # or {"beacon": "weaviate://localhost/e5dc4a4c-ef0f-3aed-89a3-a73435c6bbcf"}
        nearObject = {"id": uuid}

        q = (
            self._client.query
            .get(class_name, properties)
            # "certainty" only supported if distance==cosine
            .with_additional(list(_additional))
            .with_near_object(nearObject)

        )
        if limit != 0:
            q = q.with_limit(limit)
        return QueryUnpacker(q)

    def q_class_near_vec(self, class_name, vec, properties=[], _additional=[], certainty: int = 0, distance: int = 0, limit: int = 0) -> QueryUnpacker:
        """Finds objects of specified class close to vector"""

        if certainty != 0 and distance != 0:
            raise ValueError(
                'You can\'t set distance and certainty of near at the same time')

        if 'id' not in _additional:
            _additional.append('id')

        near_vector = {
            "vector": vec
        }
        if certainty != 0:
            near_vector['certainty'] = certainty
        elif distance != 0:
            near_vector['distance'] = distance

        q = (
            self._client.query
            .get(class_name, properties)
            .with_additional(_additional)
            .with_near_vector(near_vector)

        )
        if limit != 0:
            q = q.with_limit(limit)

        return QueryUnpacker(q)

    def q_class_with_attrval(self, class_name: str, attr: str, val: str, attrType: str, operator='Equal',  attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Finds objects with value in attribute
            operator:
                Like
                Equal
                more: https://weaviate.io/developers/weaviate/api/graphql/filters#filter-structure
            attrType:
                valueInt: The integer value where the Path's last property name should be compared to.
                valueBoolean: The boolean value that the Path's last property name should be compared to.
                valueString: The string value that the Path's last property name should be compared to.
                valueText: The text value that the Path's last property name should be compared to.
                valueNumber: The number (float) value that the Path's last property name should be compared to.
                valueDate: The date (ISO 8601 timestamp, formatted as RFC3339) value that the Path's last property name should be compared to.
        """
        if 'id' not in _additional:
            _additional.append('id')

        where_filter = {
            "path": [attr],
            "operator": "Equal",
            attrType: val
        }

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_additional(_additional)
            .with_where(where_filter)
        )
        return QueryUnpacker(q)
    
    

    def qr_obj(self, class_name: str, id: str,  attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Query one object by id"""
        where_filter = {
            "path": ['id'],
            "operator": "Equal",
            'valueString': id
        }

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_where(where_filter)
            .with_additional(_additional)

        )
        return QueryUnpacker(q)

    def q_class_all(self, class_name: str, attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Get Objects of a class, limit 10000"""
        if 'id' not in _additional:
            _additional.append('id')

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_additional(_additional)
            .with_limit(10000)
        )
        return QueryUnpacker(q)

    def qr_class_all_after_cursor(self, class_name: str, after_uuid: str, limit: int):
        """Get All Objects of a class after specific one """
        _additional = []
        if 'id' not in _additional:
            _additional.append('id')

        # q = (
        #     self._client.query
        #     .get(class_name, attributes)
        #     .with_additional(_additional)
        #     .with_limit(10000)
        # )
        # return QueryUnpacker(q)
        qr = """
                {
                    Get {
                        %s (
                              limit: %s,
                              after: "%s"
                            ) {
                            _additional{id}
                        }
                    }
                }
             """ % (class_name, limit, after_uuid)
        return QueryUnpacker(query=RawQuery(self._client, qr))

    def q_aggregate_class(self, class_name: str) -> QueryUnpacker:

        q = (
            self._client.query.aggregate(class_name)
            .with_meta_count()
        )

        return QueryUnpacker(q)

    # def qraw_get_class_hasvector(self, class_name, has_vector:bool):
    #     """Query class """
    #     qr = """

    #             {
    #                 Get {
    #                     %s (
    #                         where: {
    #                                 path: ["hasVector"],
    #                                 operator: Equal,
    #                                 valueBoolean: %s
    #                             }
    #                         ) {
    #                         _additional{id}
    #                     }
    #                 }
    #             }
    #          """ % (class_name,'true' if has_vector else 'false')
    #     print(RawQuery(self._client, qr).do())
    #     return QueryUnpacker(query=RawQuery(self._client, qr))


    def delete_object(self, class_name: str, uuid: str):
        self._client.data_object.delete(uuid=uuid, class_name=class_name, consistency_level=weaviate.ConsistencyLevel('ALL')  # all replica nodes must acknowledge delete
                                        )

    def get_schema(self):
        return self._client.schema.get()

    def describe_count(self) -> None:
        """Lists count of all objects in db"""

        schema = self.get_schema()

        for _class in schema['classes']:
            r = self.q_aggregate_class(_class['class']).do()
            print(f"""{_class['class']} {r[0]['meta']} """)

    def get_class_objects(self, class_name: str, **kwargs):
        """Return max 100, https://weaviate-python-client.readthedocs.io/en/stable/weaviate.data.html#weaviate.data.DataObject.get """
        r = self._client.data_object.get(
            class_name=class_name,
            **kwargs
        )
        return r

    def get_per_id(self, uuid: str):
        return self._client.data_object.get_by_id(uuid=uuid)

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
import weaviate

query = Query(weaviate.Client('http://localhost:8081'))
query.describe_count()

Job {'count': 16269} 
Skill {'count': 0} 


In [14]:
result = query.q_class_with_attrval(
    class_name='Job', attr='name', attrType='valueString', val='Data', operator='Like', attributes=['name'], ).do()
print(result)

[{'_additional': {'id': '6b0c0041-a0f6-50b5-b386-a1f61eb93f71'}, 'name': 'Clinical Research Coordinators/Clinical Data Coordinator'}, {'_additional': {'id': 'fa0fb8ea-f7cb-5876-9dfd-79ba1fc288f0'}, 'name': 'Management Analysts/Health Information Management Data Analyst (HIM Data Analyst)'}, {'_additional': {'id': '90c87555-60c8-561f-aa97-435e86ca6072'}, 'name': 'Computer Systems Analysts/EDI Analyst (Electronic Data Exchange Analyst)'}, {'_additional': {'id': '9456520d-3e8c-592c-ba45-81aaba979537'}, 'name': 'Geographic Information Systems Technologists and Technicians/Geographic Information Systems Data Specialist (GIS Data Specialist)'}, {'_additional': {'id': '448cbcc8-c7d7-5570-89d5-ed2cea6b6df1'}, 'name': 'Database Architects/Enterprise Data Architect'}, {'_additional': {'id': '07ccfc52-0fb5-5f8d-b77f-577e72ae711d'}, 'name': 'Data Warehousing Specialists/Data Warehouse ETL Developer (Data Warehouse Extract, Transform, and Load Developer)'}, {'_additional': {'id': '69dd87b9-854b-527

In [44]:
result = query.q_class_with_attrval(
    class_name='Skill', attr='name', attrType='valueString', val='powerpoint', operator='Like', attributes=['name'], ).do()
print(result)

[{'_additional': {'id': '45336231-8021-5265-93fe-67a18ff1a93e'}, 'name': 'specifically excel and powerpoint'}, {'_additional': {'id': '25337051-1402-543a-bdc5-fb3bb60d823f'}, 'name': 'excel and powerpoint'}, {'_additional': {'id': 'e804a797-3aca-52fd-bc2a-749b140cd91c'}, 'name': 'powerpoint and word skills'}, {'_additional': {'id': '35faf0aa-8e8c-5381-8499-12c8a228d042'}, 'name': 'proficient in powerpoint'}, {'_additional': {'id': 'f222f8e1-5fa9-55e1-959f-3e1533c10c2d'}, 'name': 'powerpoint graphics'}, {'_additional': {'id': '0502e8c6-84b7-50dd-a12d-2d81daabda53'}, 'name': 'strong powerpoint skills'}, {'_additional': {'id': 'e1baf3ee-35f0-573e-9cb2-ff49e5f048a0'}, 'name': 'powerpoint and excel'}, {'_additional': {'id': '7d1021c9-c89f-5cc8-b22d-6ab460b71065'}, 'name': 'ms powerpoint software'}, {'_additional': {'id': '64f6cebf-0c36-5666-9848-a6863d5ed44e'}, 'name': 'particularly powerpoint and excel'}, {'_additional': {'id': '344067ba-e468-5f17-a489-17b1ddd6053f'}, 'name': 'especially p

In [34]:
def get_similar(uuid):
    # result = query.q_class_near_object('Job', uuid, properties=['name'] ).do()
    # for dict_ in result:
    #     print(dict_['name'], dict_['_additional']['distance'])
        
    
    result = query.q_class_near_object('Skill', uuid, properties=['name'] ).do()
    for dict_ in result:
        print(dict_['name'], dict_['_additional']['distance'])

In [45]:
get_similar('25337051-1402-543a-bdc5-fb3bb60d823f')

excel and powerpoint 5.9604645e-08
locating products 0.0013566613
providing advice and guidance 0.001627922
execution plan development 0.0016385317
cloud services delivery 0.0017610788
transit planning 0.0022781491
filtration technology 0.0027347207
emea region 0.012846589
compliance with employment law 0.01354444
special recruitment projects 0.013834894
enterprise sales cycle 0.013941765
enhanced due diligence 0.014139414
system security checks 0.016376376
uhnw clients 0.018491387
casp+ 0.022257984
imc 0.022283256
grievance handling 0.022857904
direct care provision 0.023317099
green hydrogen projects 0.023468673
risk assessment and management 0.023539186
cerner emr experience 0.023951352
operating a forklift 0.024138808
tax filings 0.02426219
system requirements development 0.024909556
regulation and legislation 0.02518326
global trade technology 0.025356293
proper documentation 0.025761187
package products 0.025917888
cdl license preferred 0.026010871
employee share purchase plan 0.

In [None]:
#  query = Query(weaviate.Client('http://localhost:8081'))
#     vector = np.load('coe-da-pa-ssc.npz')['vectors'][100]

#     q = query.q_class_near_vec('Image', vector, _additional=['distance'])
#     result = q.do()

In [None]:
 # q_class_with_attrval
