In [7]:
import weaviate
from typing import List

from tqdm.auto import tqdm
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from typing import List, Optional
import math
import os


class RawQuery:
    def __init__(self, client: weaviate.Client, query: str) -> None:
        self._query = query
        self.client = client

    def do(self):
        result = self.client.query.raw(self._query)
        return result


class QueryUnpacker:
    def __init__(self, query: weaviate.gql.Query) -> None:
        self._query = query

    @property
    def query(self):
        if isinstance(self._query, RawQuery):
            raise ValueError(
                'query attribute is a RawQuery object, implementation not done yet')
        else:
            return self._query

    def do(self):

        result = self._query.do()

        if 'errors' in result.keys():
            raise ValueError(
                f'QueryUnpacker at execution of query: \n {result["errors"]}')
        while True:
            key = list(result.keys())[0]
            result = result[key]
            if key in ['Get', 'Aggregate', 'Explore']:
                key = list(result.keys())[0]
                result = result[key]
                break

        return result


# idea: get locals() at beginnig of func, QueryUnpacker(locals())
# return QueryUnpacker

class Query:
    def __init__(self, client: weaviate.Client):
        self._client = client

    def q_class_near_object(self, class_name: str, uuid: str, node_type=None,  properties=[], _additional=[], limit: int = 0):

        _additional = set(_additional)
        _additional.add("distance")
        _additional.add("id")

        # or {"beacon": "weaviate://localhost/e5dc4a4c-ef0f-3aed-89a3-a73435c6bbcf"}
        nearObject = {"id": uuid}
        
        if node_type is not None:
            where_filter = {
                "path": ['type'],
                "operator": 'Equal',
                'valueString': node_type
            }

        q = (
            self._client.query
            .get(class_name, properties)
            # "certainty" only supported if distance==cosine
            .with_additional(list(_additional))
            .with_near_object(nearObject)

        )
        if node_type is not None:
            where_filter = {
                "path": ['type'],
                "operator": 'Equal',
                'valueString': node_type
            }
            q = q.with_where(where_filter)
            
        if limit != 0:
            q = q.with_limit(limit)
        return QueryUnpacker(q)

    def q_class_near_vec(self, class_name, vec, properties=[], _additional=[], certainty: int = 0, distance: int = 0, limit: int = 0) -> QueryUnpacker:
        """Finds objects of specified class close to vector"""

        if certainty != 0 and distance != 0:
            raise ValueError(
                'You can\'t set distance and certainty of near at the same time')

        if 'id' not in _additional:
            _additional.append('id')

        near_vector = {
            "vector": vec
        }
        if certainty != 0:
            near_vector['certainty'] = certainty
        elif distance != 0:
            near_vector['distance'] = distance

        q = (
            self._client.query
            .get(class_name, properties)
            .with_additional(_additional)
            .with_near_vector(near_vector)

        )
        if limit != 0:
            q = q.with_limit(limit)

        return QueryUnpacker(q)

    def q_class_with_attrval(self, class_name: str, attr: str, val: str, attrType: str, operator='Equal', node_type=None, attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Finds objects with value in attribute
            operator:
                Like
                Equal
                more: https://weaviate.io/developers/weaviate/api/graphql/filters#filter-structure
            attrType:
                valueInt: The integer value where the Path's last property name should be compared to.
                valueBoolean: The boolean value that the Path's last property name should be compared to.
                valueString: The string value that the Path's last property name should be compared to.
                valueText: The text value that the Path's last property name should be compared to.
                valueNumber: The number (float) value that the Path's last property name should be compared to.
                valueDate: The date (ISO 8601 timestamp, formatted as RFC3339) value that the Path's last property name should be compared to.
        """
        if 'id' not in _additional:
            _additional.append('id')

        where_filter = {
            "path": [attr],
            "operator": operator,
            attrType: val
        }
        if node_type is not None:
            where_filter = {
                "operator": "And",
                "operands": [{
                        "path": [attr],
                        "operator": operator,
                        attrType: val
                    }, {
                        "path": ["type"],
                        "operator": "Equal",
                        "valueString": node_type,
                    }]
            }

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_additional(_additional)
            .with_where(where_filter)
        )
        return QueryUnpacker(q)
    
    

    def qr_obj(self, class_name: str, id: str,  attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Query one object by id"""
        where_filter = {
            "path": ['id'],
            "operator": "Equal",
            'valueString': id
        }

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_where(where_filter)
            .with_additional(_additional)

        )
        return QueryUnpacker(q)

    def q_class_all(self, class_name: str, attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Get Objects of a class, limit 10000"""
        if 'id' not in _additional:
            _additional.append('id')

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_additional(_additional)
            .with_limit(10000)
        )
        return QueryUnpacker(q)

    def qr_class_all_after_cursor(self, class_name: str, after_uuid: str, limit: int):
        """Get All Objects of a class after specific one """
        _additional = []
        if 'id' not in _additional:
            _additional.append('id')

        # q = (
        #     self._client.query
        #     .get(class_name, attributes)
        #     .with_additional(_additional)
        #     .with_limit(10000)
        # )
        # return QueryUnpacker(q)
        qr = """
                {
                    Get {
                        %s (
                              limit: %s,
                              after: "%s"
                            ) {
                            _additional{id}
                        }
                    }
                }
             """ % (class_name, limit, after_uuid)
        return QueryUnpacker(query=RawQuery(self._client, qr))

    def q_aggregate_class(self, class_name: str) -> QueryUnpacker:

        q = (
            self._client.query.aggregate(class_name)
            .with_meta_count()
        )

        return QueryUnpacker(q)

    # def qraw_get_class_hasvector(self, class_name, has_vector:bool):
    #     """Query class """
    #     qr = """

    #             {
    #                 Get {
    #                     %s (
    #                         where: {
    #                                 path: ["hasVector"],
    #                                 operator: Equal,
    #                                 valueBoolean: %s
    #                             }
    #                         ) {
    #                         _additional{id}
    #                     }
    #                 }
    #             }
    #          """ % (class_name,'true' if has_vector else 'false')
    #     print(RawQuery(self._client, qr).do())
    #     return QueryUnpacker(query=RawQuery(self._client, qr))


    def delete_object(self, class_name: str, uuid: str):
        self._client.data_object.delete(uuid=uuid, class_name=class_name, consistency_level=weaviate.ConsistencyLevel('ALL')  # all replica nodes must acknowledge delete
                                        )

    def get_schema(self):
        return self._client.schema.get()

    def describe_count(self) -> None:
        """Lists count of all objects in db"""

        schema = self.get_schema()

        for _class in schema['classes']:
            r = self.q_aggregate_class(_class['class']).do()
            print(f"""{_class['class']} {r[0]['meta']} """)

    def get_class_objects(self, class_name: str, **kwargs):
        """Return max 100, https://weaviate-python-client.readthedocs.io/en/stable/weaviate.data.html#weaviate.data.DataObject.get """
        r = self._client.data_object.get(
            class_name=class_name,
            **kwargs
        )
        return r

    def get_per_id(self, uuid: str):
        return self._client.data_object.get_by_id(uuid=uuid)

In [8]:
import weaviate

query = Query(weaviate.Client('http://localhost:8081'))
query.describe_count()

Node {'count': 40269} 


In [9]:
result = query.q_class_with_attrval(
    class_name='Node', attr='name', attrType='valueString', val='Food', operator='Like', attributes=['name'], ).do()
print(result)

[{'_additional': {'id': 'f4b6154d-0d44-5675-bf46-a403d1aaa87a'}, 'name': 'Food Service Managers/Catering Director'}, {'_additional': {'id': 'f650935c-8e3a-54dd-8a61-9600d66c9d80'}, 'name': 'Food Service Managers/Catering Coordinator'}, {'_additional': {'id': 'c47e3b36-9bd8-5802-bee7-07122a97d1b7'}, 'name': 'Food Service Managers/Concessionaire'}, {'_additional': {'id': '5e9b4e18-5f23-5cd5-8de6-fb698f2e0744'}, 'name': 'Food Service Managers/Food Production Manager'}, {'_additional': {'id': 'a9003cb9-9414-5acc-ae91-d648a1b59ecd'}, 'name': 'Food Service Managers/Dining Services Director'}, {'_additional': {'id': '10c0d5a6-9a89-5985-81cf-251cf999e10f'}, 'name': 'Food Service Managers/Chef Manager'}, {'_additional': {'id': '26f16829-4482-52a9-bfa7-54a3606d6d89'}, 'name': 'Food Service Managers/Food Service General Manager'}, {'_additional': {'id': '6b962fbf-65cc-5196-8e81-2cce401f49be'}, 'name': 'Food Service Managers/Deli Manager'}, {'_additional': {'id': 'fcc4e1f2-f32c-55c9-9378-cff0b71d3

In [10]:

result = query.q_class_with_attrval(
    class_name='Node', attr='name', attrType='valueString', val='data', operator='Like', attributes=['name'], node_type='Skill' ).do()
print(result)

[{'_additional': {'id': '197f73e0-2016-5af8-9c28-923bfef3e664'}, 'name': 'data mapping/design'}, {'_additional': {'id': 'ea776eb2-095b-5b7d-bd7d-04581f990ef5'}, 'name': 'data quality initiatives'}, {'_additional': {'id': '80b29516-45dd-5e55-bca1-1e4e889bb5aa'}, 'name': 'data at rest'}, {'_additional': {'id': 'e7d50e33-fcf5-528f-9a5d-5d888968acfb'}, 'name': 'big data technologies'}, {'_additional': {'id': 'a52ff698-b42d-5ead-9eea-321a074e3601'}, 'name': 'data pipeline management'}, {'_additional': {'id': '52967ff6-7f71-5b95-89a8-e540e2a1e7b2'}, 'name': 'data network management'}, {'_additional': {'id': 'b43f2b57-95e9-586b-beeb-1c8ff9b1103a'}, 'name': 'data analysis and utilization'}, {'_additional': {'id': 'a138363c-94c3-5b73-9ab2-35cb68976b8a'}, 'name': 'data input'}, {'_additional': {'id': 'b778f863-7bdc-54a5-aee0-ef107c309498'}, 'name': 'data center optimization'}, {'_additional': {'id': '37b26107-2133-52a6-b062-89caa67f1fca'}, 'name': 'data warehouses'}, {'_additional': {'id': 'e3a0

In [60]:
def get_similar(uuid, which=None):
    # result = query.q_class_near_object('Job', uuid, properties=['name'] ).do()
    # for dict_ in result:
    #     print(dict_['name'], dict_['_additional']['distance'])
        
    
    result = query.q_class_near_object('Node', uuid, properties=['name'], node_type=which).do()
    print('Similar concepts:')
    for i, dict_ in enumerate(result):
        if i==11:
            break
        print(f"{dict_['_additional']['distance']:.5f} {dict_['name']}")

In [13]:
#  query = Query(weaviate.Client('http://localhost:8081'))
#     vector = np.load('coe-da-pa-ssc.npz')['vectors'][100]

#     q = query.q_class_near_vec('Image', vector, _additional=['distance'])
#     result = q.do()

In [14]:
 # q_class_with_attrval


In [90]:
def demo(type, part_of_name, return_type=None):
    assert return_type in ['Skill','Job',None]
    assert type in ['Skill', 'Job', None]

    print(f'Found {type}s:')
    part_of_name = '*'+part_of_name+'*'
    
    result = query.q_class_with_attrval(
        class_name='Node', attr='name', attrType='valueString', val=part_of_name, operator='Like', attributes=['name'], node_type=type ).do()
    if len(result)==0:
        print('No nodes containing string found')
        return
    for i in range(min(5, len(result))):
        print(result[i]['name'])
        
    
    print('')
    print('>>> choosing',result[0]['name'],'<<<')
    id = result[0]['_additional']['id']
    
    get_similar(id, which=return_type)

    

In [76]:

demo('Job', 'Computer and Information Systems Managers/Knowledge Manager', 'Job')

Found Jobs:
Computer and Information Systems Managers/Knowledge Manager

>>> choosing Computer and Information Systems Managers/Knowledge Manager <<<
Similar concepts:
0.00000 Computer and Information Systems Managers/Knowledge Manager
0.01130 Training and Development Managers/Knowledge Manager
0.01269 Software Developers/Knowledge Management Application Developer
0.01292 Medical Secretaries and Administrative Assistants/Administrative Support Specialist
0.01298 Office Clerks, General/Administrative Support Specialist
0.01331 Executive Secretaries and Executive Administrative Assistants/Administrative Support Specialist
0.01361 Environmental Scientists and Specialists, Including Health/Natural Resources Specialist
0.01427 Production, Planning, and Expediting Clerks/Production Coordinator
0.01433 First-Line Supervisors of Office and Administrative Support Workers/Support Manager
0.01456 Training and Development Specialists/Skill Training Program Coordinator
0.01465 Lawyers/Legal Advisor

In [63]:
demo('Job', 'Computer and Information Systems Managers/Knowledge Manager', 'Skill')

Found Jobs:
Computer and Information Systems Managers/Knowledge Manager

>>> choosing Computer and Information Systems Managers/Knowledge Manager <<<
Similar concepts:
0.59545 process and procedures
0.59967 print production techniques
0.64259 fact-based decision-making
0.64383 analyzing patents
0.64429 surface characterization
0.64431 qa testing tools
0.64431 healthcare marketplace
0.64431 unmanned systems
0.64431 google ads platform
0.64431 procurement transactions
0.64431 programmatic leadership


In [94]:
demo('Job', 'Front End', 'Job')

Found Jobs:
Web and Digital Interface Designers/Front End Developer
Web and Digital Interface Designers/Front End Web Developer
Web Developers/Front End Web Developer
Web Developers/Front End Software Engineer
Web Developers/Front End Developer

>>> choosing Web and Digital Interface Designers/Front End Developer <<<
Similar concepts:
0.00000 Web and Digital Interface Designers/Front End Developer
0.02272 Wind Energy Development Managers/Business Development Director
0.02389 Office Clerks, General/Front Office Assistant
0.02471 Education Administrators, Kindergarten through Secondary/School Business Manager
0.02551 Air Traffic Controllers/Flight Control Specialist
0.02552 Human Resources Specialists/Personnel Consultant
0.02559 Home Health Aides/Direct Care Professional
0.02567 Clinical Nurse Specialists/Adult Health Clinical Nurse Specialist
0.02568 Commercial and Industrial Designers/Product Developer
0.02623 Court Reporters and Simultaneous Captioners/Shorthand Reporter
0.02664 Chil

In [71]:
demo('Job', 'Dentist', 'Skill')

Found Jobs:
Dentists, General/Pediatric Dentist
Dentists, General/Public Health Dentist
Dentists, General/General Dentist
Dentists, All Other Specialists/Pediatric Dentist
Veterinarians/Veterinary Dentist (Vet Dentist)

>>> choosing Dentists, General/Pediatric Dentist <<<
Similar concepts:
0.60548 process and procedures
0.60593 print production techniques
0.64221 fact-based decision-making
0.64238 qa testing tools
0.64238 cable splicing
0.64238 procurement transactions
0.64238 google ads platform
0.64238 healthcare marketplace
0.64238 programmatic leadership
0.64238 unmanned systems
0.64238 performance and reliability testing


In [84]:
demo('Skill', 'cook', 'Job')

Found Skills:
line cook experience
grill cook experience
grill cook
lead line cook
cook experience

>>> choosing line cook experience <<<
Similar concepts:
0.92024 Project Management Specialists/Project Delivery Manager
0.92601 Aircraft Mechanics and Service Technicians/Flight Test Mechanic
0.92640 Inspectors, Testers, Sorters, Samplers, and Weighers/Shipping Inspector
0.92973 Software Quality Assurance Analysts and Testers/Software Requirements Engineer
0.93141 Epidemiologists/Environmental Epidemiologist
0.93185 Inspectors, Testers, Sorters, Samplers, and Weighers/Major Assembly Inspector
0.93241 Gambling Managers/Table Games Shift Manager
0.93244 Health and Safety Engineers, Except Mining Safety Engineers and Inspectors/Product Safety Test Engineer
0.93356 Computer Hardware Engineers/Digital Design Engineer
0.93405 First-Line Supervisors of Gambling Services Workers/Table Games Shift Manager
0.93426 Materials Engineers/Test Engineer


In [82]:
result = query.q_class_with_attrval(
    class_name='Node', attr='name', attrType='valueString', val='Food', operator='Like', attributes=['name'], ).do()

In [17]:
result

[{'_additional': {'id': 'f4b6154d-0d44-5675-bf46-a403d1aaa87a'},
  'name': 'Food Service Managers/Catering Director'},
 {'_additional': {'id': 'f650935c-8e3a-54dd-8a61-9600d66c9d80'},
  'name': 'Food Service Managers/Catering Coordinator'},
 {'_additional': {'id': 'c47e3b36-9bd8-5802-bee7-07122a97d1b7'},
  'name': 'Food Service Managers/Concessionaire'},
 {'_additional': {'id': '5e9b4e18-5f23-5cd5-8de6-fb698f2e0744'},
  'name': 'Food Service Managers/Food Production Manager'},
 {'_additional': {'id': 'a9003cb9-9414-5acc-ae91-d648a1b59ecd'},
  'name': 'Food Service Managers/Dining Services Director'},
 {'_additional': {'id': '10c0d5a6-9a89-5985-81cf-251cf999e10f'},
  'name': 'Food Service Managers/Chef Manager'},
 {'_additional': {'id': '26f16829-4482-52a9-bfa7-54a3606d6d89'},
  'name': 'Food Service Managers/Food Service General Manager'},
 {'_additional': {'id': '6b962fbf-65cc-5196-8e81-2cce401f49be'},
  'name': 'Food Service Managers/Deli Manager'},
 {'_additional': {'id': 'fcc4e1f2-