In [121]:
import weaviate
from typing import List

from tqdm.auto import tqdm
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from typing import List, Optional
import math
import os


class RawQuery:
    def __init__(self, client: weaviate.Client, query: str) -> None:
        self._query = query
        self.client = client

    def do(self):
        result = self.client.query.raw(self._query)
        return result


class QueryUnpacker:
    def __init__(self, query: weaviate.gql.Query) -> None:
        self._query = query

    @property
    def query(self):
        if isinstance(self._query, RawQuery):
            raise ValueError(
                'query attribute is a RawQuery object, implementation not done yet')
        else:
            return self._query

    def do(self):

        result = self._query.do()

        if 'errors' in result.keys():
            raise ValueError(
                f'QueryUnpacker at execution of query: \n {result["errors"]}')
        while True:
            key = list(result.keys())[0]
            result = result[key]
            if key in ['Get', 'Aggregate', 'Explore']:
                key = list(result.keys())[0]
                result = result[key]
                break

        return result


# idea: get locals() at beginnig of func, QueryUnpacker(locals())
# return QueryUnpacker

class Query:
    def __init__(self, client: weaviate.Client):
        self._client = client

    def q_class_near_object(self, class_name: str, uuid: str, node_type=None,  properties=[], _additional=[], limit: int = 0):

        _additional = set(_additional)
        _additional.add("distance")
        _additional.add("id")

        # or {"beacon": "weaviate://localhost/e5dc4a4c-ef0f-3aed-89a3-a73435c6bbcf"}
        nearObject = {"id": uuid}
        
        if node_type is not None:
            where_filter = {
                "path": ['type'],
                "operator": 'Equal',
                'valueString': node_type
            }

        q = (
            self._client.query
            .get(class_name, properties)
            # "certainty" only supported if distance==cosine
            .with_additional(list(_additional))
            .with_near_object(nearObject)

        )
        if node_type is not None:
            where_filter = {
                "path": ['type'],
                "operator": 'Equal',
                'valueString': node_type
            }
            q = q.with_where(where_filter)
            
        if limit != 0:
            q = q.with_limit(limit)
        return QueryUnpacker(q)

    def q_class_near_vec(self, class_name, vec, properties=[], _additional=[], certainty: int = 0, distance: int = 0, limit: int = 0) -> QueryUnpacker:
        """Finds objects of specified class close to vector"""

        if certainty != 0 and distance != 0:
            raise ValueError(
                'You can\'t set distance and certainty of near at the same time')

        if 'id' not in _additional:
            _additional.append('id')

        near_vector = {
            "vector": vec
        }
        if certainty != 0:
            near_vector['certainty'] = certainty
        elif distance != 0:
            near_vector['distance'] = distance

        q = (
            self._client.query
            .get(class_name, properties)
            .with_additional(_additional)
            .with_near_vector(near_vector)

        )
        if limit != 0:
            q = q.with_limit(limit)

        return QueryUnpacker(q)

    def q_class_with_attrval(self, class_name: str, attr: str, val: str, attrType: str, operator='Equal', node_type=None, attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Finds objects with value in attribute
            operator:
                Like
                Equal
                more: https://weaviate.io/developers/weaviate/api/graphql/filters#filter-structure
            attrType:
                valueInt: The integer value where the Path's last property name should be compared to.
                valueBoolean: The boolean value that the Path's last property name should be compared to.
                valueString: The string value that the Path's last property name should be compared to.
                valueText: The text value that the Path's last property name should be compared to.
                valueNumber: The number (float) value that the Path's last property name should be compared to.
                valueDate: The date (ISO 8601 timestamp, formatted as RFC3339) value that the Path's last property name should be compared to.
        """
        if 'id' not in _additional:
            _additional.append('id')

        where_filter = {
            "path": [attr],
            "operator": operator,
            attrType: val
        }
        if node_type is not None:
            where_filter = {
                "operator": "And",
                "operands": [{
                        "path": [attr],
                        "operator": operator,
                        attrType: val
                    }, {
                        "path": ["type"],
                        "operator": "Equal",
                        "valueString": node_type,
                    }]
            }

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_additional(_additional)
            .with_where(where_filter)
        )
        return QueryUnpacker(q)
    
    

    def qr_obj(self, class_name: str, id: str,  attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Query one object by id"""
        where_filter = {
            "path": ['id'],
            "operator": "Equal",
            'valueString': id
        }

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_where(where_filter)
            .with_additional(_additional)

        )
        return QueryUnpacker(q)

    def q_class_all(self, class_name: str, attributes: List[str] = [], _additional: List[str] = []) -> QueryUnpacker:
        """Get Objects of a class, limit 10000"""
        if 'id' not in _additional:
            _additional.append('id')

        q = (
            self._client.query
            .get(class_name, attributes)
            .with_additional(_additional)
            .with_limit(10000)
        )
        return QueryUnpacker(q)

    def qr_class_all_after_cursor(self, class_name: str, after_uuid: str, limit: int):
        """Get All Objects of a class after specific one """
        _additional = []
        if 'id' not in _additional:
            _additional.append('id')

        # q = (
        #     self._client.query
        #     .get(class_name, attributes)
        #     .with_additional(_additional)
        #     .with_limit(10000)
        # )
        # return QueryUnpacker(q)
        qr = """
                {
                    Get {
                        %s (
                              limit: %s,
                              after: "%s"
                            ) {
                            _additional{id}
                        }
                    }
                }
             """ % (class_name, limit, after_uuid)
        return QueryUnpacker(query=RawQuery(self._client, qr))

    def q_aggregate_class(self, class_name: str) -> QueryUnpacker:

        q = (
            self._client.query.aggregate(class_name)
            .with_meta_count()
        )

        return QueryUnpacker(q)

    # def qraw_get_class_hasvector(self, class_name, has_vector:bool):
    #     """Query class """
    #     qr = """

    #             {
    #                 Get {
    #                     %s (
    #                         where: {
    #                                 path: ["hasVector"],
    #                                 operator: Equal,
    #                                 valueBoolean: %s
    #                             }
    #                         ) {
    #                         _additional{id}
    #                     }
    #                 }
    #             }
    #          """ % (class_name,'true' if has_vector else 'false')
    #     print(RawQuery(self._client, qr).do())
    #     return QueryUnpacker(query=RawQuery(self._client, qr))


    def delete_object(self, class_name: str, uuid: str):
        self._client.data_object.delete(uuid=uuid, class_name=class_name, consistency_level=weaviate.ConsistencyLevel('ALL')  # all replica nodes must acknowledge delete
                                        )

    def get_schema(self):
        return self._client.schema.get()

    def describe_count(self) -> None:
        """Lists count of all objects in db"""

        schema = self.get_schema()

        for _class in schema['classes']:
            r = self.q_aggregate_class(_class['class']).do()
            print(f"""{_class['class']} {r[0]['meta']} """)

    def get_class_objects(self, class_name: str, **kwargs):
        """Return max 100, https://weaviate-python-client.readthedocs.io/en/stable/weaviate.data.html#weaviate.data.DataObject.get """
        r = self._client.data_object.get(
            class_name=class_name,
            **kwargs
        )
        return r

    def get_per_id(self, uuid: str):
        return self._client.data_object.get_by_id(uuid=uuid)

In [122]:
import weaviate

query = Query(weaviate.Client('http://localhost:8081'))
query.describe_count()

Node {'count': 136684} 


In [123]:
result = query.q_class_with_attrval(
    class_name='Node', attr='name', attrType='valueString', val='Food', operator='Like', attributes=['name'], ).do()
print(result)

[{'_additional': {'id': 'f4b6154d-0d44-5675-bf46-a403d1aaa87a'}, 'name': 'Food Service Managers/Catering Director'}, {'_additional': {'id': 'f650935c-8e3a-54dd-8a61-9600d66c9d80'}, 'name': 'Food Service Managers/Catering Coordinator'}, {'_additional': {'id': 'c47e3b36-9bd8-5802-bee7-07122a97d1b7'}, 'name': 'Food Service Managers/Concessionaire'}, {'_additional': {'id': '5e9b4e18-5f23-5cd5-8de6-fb698f2e0744'}, 'name': 'Food Service Managers/Food Production Manager'}, {'_additional': {'id': 'a9003cb9-9414-5acc-ae91-d648a1b59ecd'}, 'name': 'Food Service Managers/Dining Services Director'}, {'_additional': {'id': '10c0d5a6-9a89-5985-81cf-251cf999e10f'}, 'name': 'Food Service Managers/Chef Manager'}, {'_additional': {'id': '26f16829-4482-52a9-bfa7-54a3606d6d89'}, 'name': 'Food Service Managers/Food Service General Manager'}, {'_additional': {'id': '6b962fbf-65cc-5196-8e81-2cce401f49be'}, 'name': 'Food Service Managers/Deli Manager'}, {'_additional': {'id': 'fcc4e1f2-f32c-55c9-9378-cff0b71d3

In [124]:

result = query.q_class_with_attrval(
    class_name='Node', attr='name', attrType='valueString', val='data', operator='Like', attributes=['name'], node_type='Skill' ).do()
print(result)

[{'_additional': {'id': '197f73e0-2016-5af8-9c28-923bfef3e664'}, 'name': 'data mapping/design'}, {'_additional': {'id': 'ea776eb2-095b-5b7d-bd7d-04581f990ef5'}, 'name': 'data quality initiatives'}, {'_additional': {'id': '80b29516-45dd-5e55-bca1-1e4e889bb5aa'}, 'name': 'data at rest'}, {'_additional': {'id': 'e7d50e33-fcf5-528f-9a5d-5d888968acfb'}, 'name': 'big data technologies'}, {'_additional': {'id': 'a52ff698-b42d-5ead-9eea-321a074e3601'}, 'name': 'data pipeline management'}, {'_additional': {'id': '52967ff6-7f71-5b95-89a8-e540e2a1e7b2'}, 'name': 'data network management'}, {'_additional': {'id': 'b43f2b57-95e9-586b-beeb-1c8ff9b1103a'}, 'name': 'data analysis and utilization'}, {'_additional': {'id': 'a138363c-94c3-5b73-9ab2-35cb68976b8a'}, 'name': 'data input'}, {'_additional': {'id': 'b778f863-7bdc-54a5-aee0-ef107c309498'}, 'name': 'data center optimization'}, {'_additional': {'id': '37b26107-2133-52a6-b062-89caa67f1fca'}, 'name': 'data warehouses'}, {'_additional': {'id': 'e3a0

In [125]:
def get_similar(uuid, which=None):
    # result = query.q_class_near_object('Job', uuid, properties=['name'] ).do()
    # for dict_ in result:
    #     print(dict_['name'], dict_['_additional']['distance'])
        
    
    result = query.q_class_near_object('Node', uuid, properties=['name'], node_type=which).do()
    print('Similar concepts:')
    for i, dict_ in enumerate(result):
        if i==31:
            break
        print(f"{dict_['_additional']['distance']:.5f} {dict_['name']}")

In [126]:
#  query = Query(weaviate.Client('http://localhost:8081'))
#     vector = np.load('coe-da-pa-ssc.npz')['vectors'][100]

#     q = query.q_class_near_vec('Image', vector, _additional=['distance'])
#     result = q.do()

In [127]:
 # q_class_with_attrval


In [128]:
def demo(type, part_of_name, return_type=None):
    assert return_type in ['Skill','Job',None]
    assert type in ['Skill', 'Job', None]

    print(f'Found {type}s:')
    part_of_name = part_of_name
    
    result = query.q_class_with_attrval(
        class_name='Node', attr='name', attrType='valueString', val=part_of_name, operator='Like', attributes=['name'], node_type=type ).do()
    if len(result)==0:
        print('No nodes containing string found')
        return
    for i in range(min(5, len(result))):
        print(result[i]['name'])
        
    
    print('')
    print('>>> choosing',result[0]['name'],'<<<')
    id = result[0]['_additional']['id']
    
    get_similar(id, which=return_type)

    

In [129]:

demo('Job', 'Doctor', 'Job')

Found Jobs:
Epidemiologists/Epidemiology Research Doctor
Dentists, General/Dental Medicine Doctor (DMD)
Optometrists/Optometry Doctor (OD)
Dentists, General/Dental Surgery Doctor (DDS)
Veterinarians/Veterinary Medicine Doctor (DVM)

>>> choosing Epidemiologists/Epidemiology Research Doctor <<<
Similar concepts:
-0.00000 Epidemiologists/Epidemiology Research Doctor
0.00757 Epidemiologists/Epidemiologist Researcher
0.00768 Epidemiologists/Research Epidemiologist
0.00875 Epidemiologists/Medical Epidemiologist
0.01258 Epidemiologists/Clinical Epidemiologist
0.01282 Intelligence Analysts/Investigative Research Specialist
0.01402 Microbiologists/Microbiology Specialist
0.01480 Epidemiologists/Public Health Epidemiologist
0.01504 Histotechnologists/Histology Specialist
0.01510 Epidemiologists/Pharmacoepidemiologist
0.01515 Skincare Specialists/Skin Care Specialist
0.01577 Epidemiologists/Chronic Disease Epidemiologist
0.01582 Dermatologists/Dermatologist Physician
0.01587 Epidemiologists/Epid

In [130]:
demo('Job', 'Chef', 'Job')

Found Jobs:
Chefs and Head Cooks/Head Pastry Chef
Chefs and Head Cooks/Head Chef
Chefs and Head Cooks/Kitchen Chef
Chefs and Head Cooks/Pastry Chef
Chefs and Head Cooks/Sous Chef

>>> choosing Chefs and Head Cooks/Head Pastry Chef <<<
Similar concepts:
0.00000 Chefs and Head Cooks/Head Pastry Chef
0.00819 Bakers/Pastry Chef
0.01376 Industrial Engineering Technologists and Technicians/Production Control Expert
0.01417 Emergency Medicine Physicians/Emergency Room Physician (ER Physician)
0.01443 Cooks, Restaurant/Foreign Food Specialty Cook
0.01444 Printing Press Operators/Offset Pressman
0.01448 First-Line Supervisors of Production and Operating Workers/Watch Manufacturing Supervisor
0.01461 Dishwashers/Dishwashing Machine Operator
0.01511 Media Technical Directors/Managers/Production Supervisor
0.01522 Industrial Engineering Technologists and Technicians/Manufacturing Planner
0.01527 Coin, Vending, and Amusement Machine Servicers and Repairers/Vending Service Technician
0.01535 Archite

In [131]:
demo('Job', 'Doctor', 'Skill')

Found Jobs:
Epidemiologists/Epidemiology Research Doctor
Dentists, General/Dental Medicine Doctor (DMD)
Optometrists/Optometry Doctor (OD)
Dentists, General/Dental Surgery Doctor (DDS)
Veterinarians/Veterinary Medicine Doctor (DVM)

>>> choosing Epidemiologists/Epidemiology Research Doctor <<<
Similar concepts:
0.56871 print production techniques
0.57158 process and procedures
0.60022 fact-based decision-making
0.60101 analyzing patents
0.60137 surface characterization
0.60182 google ads platform
0.60182 procurement transactions
0.60182 qa testing tools
0.60182 healthcare marketplace
0.60182 programmatic leadership
0.60182 unmanned systems
0.60182 performance and reliability testing
0.60182 cable splicing
0.60246 user provisioning
0.60737 data entry and retrieval
0.60774 background verification
0.61459 operating welding equipment
0.61636 problem formulation
0.62152 good judgement
0.62682 environmental plan
0.62820 control panel layout
0.65266 wireless device support
0.65982 understandi

In [132]:
demo('Job', '*Machine Learning Data Scientist', 'Skill')

Found Jobs:
Data Scientists/Machine Learning Scientist
Data Scientists/Machine Learning Data Scientist

>>> choosing Data Scientists/Machine Learning Scientist <<<
Similar concepts:
0.61630 process and procedures
0.62093 print production techniques
0.66177 surface characterization
0.66227 analyzing patents
0.66280 fact-based decision-making
0.66448 google ads platform
0.66448 cable splicing
0.66448 healthcare marketplace
0.66448 unmanned systems
0.66448 performance and reliability testing
0.66448 programmatic leadership
0.66448 procurement transactions
0.66448 qa testing tools
0.66502 user provisioning
0.66916 data entry and retrieval
0.66947 background verification
0.67524 operating welding equipment
0.68020 good judgement
0.68448 problem formulation
0.68555 environmental plan
0.68671 control panel layout
0.69077 wireless device support
0.71335 understanding of cloud services
0.72679 global services organization
0.74698 value added service
0.75254 operations oversight
0.75437 data gov

In [133]:
demo('Skill', 'excel', 'Skill')

Found Skills:
ms-office excel
specifically excel and powerpoint
excel spreadsheet skills
experience using excel
excel and powerpoint

>>> choosing ms-office excel <<<
Similar concepts:
0.00000 ms-office excel
0.02905 automation and controls systems
0.02954 construction quality control
0.02961 communicable disease management
0.02964 chemical program
0.02988 maintenance and industrial services
0.03444 local delivery experience
0.03553 confidentiality of information
0.04436 contract crushing
0.04578 work queue
0.05151 adobe after effects
0.05442 safety training and inspections
0.05554 networking applications
0.05634 reliability planning
0.05641 thin film optical coatings
0.06096 scheduling system
0.06109 efficient processing
0.06123 apache web servers
0.06221 software and application design
0.06416 administration of oxygen
0.06670 training and more
0.06938 accurate order processing
0.06969 post graduate qualification
0.07019 incident lifecycle
0.07042 jib procedures and standards
0.07049 

In [134]:
demo('Skill', 'eating', 'Skill')


Found Skills:
healthful eating
eating disorders
eating disorder treatment
eating areas

>>> choosing healthful eating <<<
Similar concepts:
0.00000 healthful eating
0.01789 customer partnerships
0.02458 functional analyses
0.03621 macroeconomic forecasting
0.03925 continuous growth
0.04525 mechanical repair and maintenance
0.04530 knowledge of electronic components
0.04557 professional training
0.05110 interconnection service agreements
0.05516 patient admission/transfer/discharge
0.05757 incident lifecycle
0.06373 public relations/customer service
0.06430 punctuation skills
0.06545 it landscape
0.06935 intellectual property issues
0.07288 energy transition
0.07363 global support
0.07450 developing relationships with clients
0.07458 incident follow-up
0.07807 adobe after effects
0.07915 administration of oxygen
0.08108 hospital personnel
0.08198 guest requests
0.08317 contract crushing
0.08433 practical setting
0.08445 software and application design
0.08499 hr consultative support
0.0

In [135]:
demo('Job', 'Front End', 'Job')

Found Jobs:
No nodes containing string found


In [136]:
demo('Job', 'Dentist', 'Skill')

Found Jobs:
Dentists, General/Pediatric Dentist
Dentists, General/Public Health Dentist
Dentists, General/General Dentist
Dentists, All Other Specialists/Pediatric Dentist
Veterinarians/Veterinary Dentist (Vet Dentist)

>>> choosing Dentists, General/Pediatric Dentist <<<
Similar concepts:
0.60548 process and procedures
0.60593 print production techniques
0.64221 fact-based decision-making
0.64238 google ads platform
0.64238 qa testing tools
0.64238 performance and reliability testing
0.64238 procurement transactions
0.64238 programmatic leadership
0.64238 healthcare marketplace
0.64238 unmanned systems
0.64238 cable splicing
0.64296 user provisioning
0.64326 analyzing patents
0.64444 surface characterization
0.64737 data entry and retrieval
0.64770 background verification
0.65385 operating welding equipment
0.66025 good judgement
0.66425 problem formulation
0.66484 environmental plan
0.66608 control panel layout
0.69267 wireless device support
0.69447 understanding of cloud services
0

In [137]:
demo('Skill', 'radioology', 'Skill')

Found Skills:
No nodes containing string found


In [138]:
import torch
from torch_geometric.data import HeteroData


filename = 'Job_Skill_HeteroData_withdupes_fulldataset_v1.pt'
if os.path.exists('./'+filename):
    data = HeteroData.from_dict(torch.load('./'+filename))
    print('loading saved heterodata object')

KeyboardInterrupt: 

In [None]:
from torch_geometric import seed_everything
import torch_geometric.transforms as T


transform = T.RandomLinkSplit(
    is_undirected=True,
    edge_types=[
        ('Job', 'REQUIRES', 'Skill'),
        ('Skill', 'IS_SIMILAR_SKILL', 'Skill'),
        ('Job', 'IS_SIMILAR_JOB', 'Job')
        ],
    rev_edge_types=[
        ('Skill', 'rev_REQUIRES', 'Job'),
        ('Skill', 'rev_IS_SIMILAR_SKILL', 'Skill'),
        ('Job', 'rev_IS_SIMILAR_JOB', 'Job')
    ],
    num_val=0.005,
    num_test=0.01,
    add_negative_train_samples=False, # only adds neg samples for val and test, neg train are added by LinkNeighborLoader. This means for each train batch, negs. are different, for val and train they stay the same
    neg_sampling_ratio=1.0,
    disjoint_train_ratio=0, #  training edges are shared for message passing and supervision
    

    )

seed_everything(14)
train_data, val_data, test_data = transform(data)

from typing import Tuple, List
from torch_geometric.loader import LinkNeighborLoader
from torch_geometric.sampler import NegativeSampling
from torch_geometric.data import HeteroData
import gc
import multiprocessing as mp


def create_loader(data:HeteroData, edge_type:Tuple[str,str,str], num_neighbors:List[int], negative_sampling_amount:int, batch_size:int, is_training:bool)->LinkNeighborLoader:

    #print('create mini-batches for', edge)

    negative_sampling = NegativeSampling(
        mode='binary',
        amount=negative_sampling_amount  # ratio, like Graphsage
        #weight=  # "Probabilities" of nodes to be sampled: Node degree follows power law distribution
        )
    num_workers = 0
    
    loader = LinkNeighborLoader(
        data,
        num_neighbors={
            ('Job', 'REQUIRES', 'Skill'):[5,4],
            ('Skill', 'rev_REQUIRES', 'Job'):[5,4],
            ('Skill', 'IS_SIMILAR_SKILL', 'Skill'):[5,4], # In this example, index 0 will never be used, since neighboring edge to a job node can't be a skill-skill edge
            ('Skill', 'rev_IS_SIMILAR_SKILL', 'Skill'):[5,4],
            ('Job', 'IS_SIMILAR_JOB', 'Job'):[5,4],
            ('Job', 'rev_IS_SIMILAR_JOB', 'Job'):[5,4],
        },
        edge_label_index=(edge_type, data[edge_type].edge_label_index), # if (edge, None), None means all edges are considered
     
        neg_sampling=negative_sampling, # adds negative samples
        batch_size=batch_size,
        shuffle=is_training,
        #drop_last=True,
        num_workers=num_workers,
        directed=True,  # contains only edges which are followed, False: contains full node induced subgraph
        #disjoint=True # sampled seed node creates its own, disjoint from the rest, subgraph, will add "batch vector" to loader output
        pin_memory=True, # faster data transfer to gpu
        #num_workers=2,
        #prefetch_factor=2
    )
    print(f'Using {num_workers} workers in the dataloader for edgetype {edge_type}')
# A
    return loader


batch_size=64
num_neighbors = [5,4]

def create_iterator(data, is_training:bool):
    loaders = []
    supervision_edge_types = []
    for edge_type in [
        ('Job', 'REQUIRES', 'Skill'),
        ('Job', 'IS_SIMILAR_JOB', 'Job'), 
        # ('Skill', 'IS_SIMILAR_SKILL', 'Skill')
        ]:
        # if 'rev_' in edge_type[1]:
        #     continue    
        # we dont need rev_ target edges, since they are the same
        # rev edges are only needed in the later step for the gnn traversal
        # create mini-batches for each edge type, because LinkNeighborLoader only allows one target edge type
     
        loader = create_loader(
            data=data,
            edge_type=edge_type,
            num_neighbors=num_neighbors,
            batch_size=batch_size,
            is_training=is_training,
            negative_sampling_amount=(20 if is_training else 1)
        )
        loaders.append(loader)
        supervision_edge_types.append(edge_type)
    
    
    # creates an iterator which has as many elements as the longest iterable
    # other iterables will be repeated until the longest is done
    length = 0
    index = 0
    for i, iterable in enumerate(loaders):
        l = len(iterable)
        if l>length:
            length = l
            index = i
            
 
    # for loader in loaders:
    #     minibatch = next(iter(loader))
        

    longest_loader = loaders.pop(index)
    longest_loader_edge_type = supervision_edge_types.pop(index)
    
   
    # create a list of iterators
    iterators = [iter(loader) for loader in loaders]
    
    def iterator():
        for batch in longest_loader:
            batches = [(longest_loader_edge_type,batch)]
            for i in range(len(iterators)):
                try:
                    batches.append((supervision_edge_types[i],next(iterators[i])))
                   
                except StopIteration:
                    iterators[i] = iter(loaders[i]) # will "reinit" iterator
                    batches.append((supervision_edge_types[i],next(iterators[i])))
                    
                    
            yield tuple(batches)

    return iterator, len(longest_loader)
    
    

# watch -n 1 df -h /dev/shm
gc.collect()
train_iterator, train_batch_len = create_iterator(train_data, is_training=True)
val_iterator, val_batch_len = create_iterator(val_data, is_training=False)
test_iterator, test_batch_len = create_iterator(test_data, is_training=False)

Using 0 workers in the dataloader for edgetype ('Job', 'REQUIRES', 'Skill')
Using 0 workers in the dataloader for edgetype ('Job', 'IS_SIMILAR_JOB', 'Job')
Using 0 workers in the dataloader for edgetype ('Job', 'REQUIRES', 'Skill')
Using 0 workers in the dataloader for edgetype ('Job', 'IS_SIMILAR_JOB', 'Job')
Using 0 workers in the dataloader for edgetype ('Job', 'REQUIRES', 'Skill')
Using 0 workers in the dataloader for edgetype ('Job', 'IS_SIMILAR_JOB', 'Job')


In [139]:
batch = next(iter(val_iterator()))

NameError: name 'val_iterator' is not defined

In [None]:
batch[1][1]['Job'].n_id[:50]

tensor([ 1292,  1552,  1955,  2860,  3459,  3463,  3717,  3838,  4929,  5186,
         5200,  5221,  5345,  5501,  6506,  6997,  6998,  7283,  7287,  7942,
         8122,  9227,  9480,  9685, 10146, 10324, 10931, 11097, 11261, 12220,
        12458, 12715, 12777, 14243, 14848, 15178, 15329, 16073, 17326, 17999,
        18100, 18368, 18424, 18624, 18945, 18995, 19712, 19884, 19933, 20122])

In [None]:
mappings = torch.load('Job_Skill_HeteroData_name_mappings_withdupes_fulldataset_v1.pt')

In [146]:
demo('Job','*Chicken Cutter*','Job')

Found Jobs:
Meat, Poultry, and Fish Cutters and Trimmers/Chicken Cutter

>>> choosing Meat, Poultry, and Fish Cutters and Trimmers/Chicken Cutter <<<
Similar concepts:
-0.00000 Meat, Poultry, and Fish Cutters and Trimmers/Chicken Cutter
0.01161 Embalmers/Embalmer
0.01180 Captains, Mates, and Pilots of Water Vessels/Tugboat Mate
0.01181 Food Preparation Workers/Pie Cutter
0.01290 Cutting and Slicing Machine Setters, Operators, and Tenders/Shredder
0.01472 Butchers and Meat Cutters/Journeyman Meat Cutter
0.01503 Production Workers, All Other/Cheesemaking Laborer
0.01573 Production Workers, All Other/Job Hand
0.01575 Packers and Packagers, Hand/Meat Wrapper
0.01593 Production Workers, All Other/Nylon Machine Operator
0.01625 Farmworkers and Laborers, Crop, Nursery, and Greenhouse/Farm Hand
0.01729 Title Examiners, Abstractors, and Searchers/Escrow Officer
0.01760 Gambling Cage Workers/Gaming Cage Cashier
0.01778 Packers and Packagers, Hand/Meat Packer
0.01779 First-Line Supervisors of Gam

In [None]:
mappings['jobmapping_index_to_title_alttile'][mappings['inverted_jobmapping'][18995]]

'First-Line Supervisors of Food Preparation and Serving Workers/Cafeteria Manager'

In [None]:
demo('Skill','* cloud native systems','Job')

Found Skills:
cloud native systems

>>> choosing cloud native systems <<<
Similar concepts:
0.75876 Light Truck Drivers/Bulk Delivery Driver
0.75893 Logistics Analysts/Inventory Control Analyst
0.75959 Motor Vehicle Operators, All Other/New Autos Delivery Driver
0.75975 Light Truck Drivers/Pharmacy Delivery Driver
0.76187 Stockers and Order Fillers/Stock Checker
0.76297 First-Line Supervisors of Office and Administrative Support Workers/Stock Control Supervisor
0.76349 First-Line Supervisors of Retail Sales Workers/Stock Manager
0.76469 Transportation, Storage, and Distribution Managers/Shipping Receiving Manager
0.76481 First-Line Supervisors of Office and Administrative Support Workers/Inventory Control Manager
0.76538 News Analysts, Reporters, and Journalists/Breaking News Reporter
0.76555 Securities, Commodities, and Financial Services Sales Agents/Stock Associate
0.76674 Stockers and Order Fillers/Store Stocker
0.76743 Stockers and Order Fillers/Stock Deliverer
0.76745 Stockers an

In [None]:
demo('Skill','planning','Job')

Found Skills:
project planning and management
external communications planning
procurement planning and execution
care planning and modification
resource capacity planning

>>> choosing project planning and management <<<
Similar concepts:
1.00000 Computer and Information Systems Managers/Knowledge Manager
1.00000 Computer and Information Systems Managers/Information Technology Administrator (IT Administrator)
1.00000 Administrative Services Managers/Administrative Coordinator
1.00000 Computer and Information Systems Managers/Information Support Project Manager
1.00000 Computer and Information Systems Managers/Development Manager
1.00000 Computer and Information Systems Managers/Information Services Manager
1.00000 Administrative Services Managers/Administrative Manager
1.00000 Administrative Services Managers/Administrative Officer
1.00000 Administrative Services Managers/Administrative Services Manager
1.00000 Computer and Information Systems Managers/Information Systems Administrato

In [None]:
demo('Skill','manual dexterity','Job')

Found Skills:
manual dexterity and accuracy
manual finger dexterity
finger and manual dexterity
good manual dexterity
use of manual dexterity

>>> choosing manual dexterity and accuracy <<<
Similar concepts:
0.86272 Electronics Engineers, Except Computer/Communications Engineer
0.86414 Network and Computer Systems Administrators/Internet Systems Administrator
0.86421 Captains, Mates, and Pilots of Water Vessels/River Pilot
0.86423 Computer Network Support Specialists/Network Specialist
0.86576 Public Safety Telecommunicators/Telecommunicator
0.86581 Computer Network Support Specialists/Network Management Specialist
0.86634 Information Security Analysts/Internet Security Specialist
0.86650 Aerospace Engineers/Flight Engineer
0.86696 Computer Network Architects/Network Specialist
0.86799 Sales Representatives, Wholesale and Manufacturing, Technical and Scientific Products/Technical Service Representative
0.86814 Electronics Engineers, Except Computer/Telecommunications Engineer
0.86816 C

In [None]:
demo('Job',mappings['jobmapping_index_to_title_alttile'][mappings['inverted_jobmapping'][18995]],'Job')

In [None]:
demo('Job',mappings['jobmapping_index_to_title_alttile'][mappings['inverted_jobmapping'][2860]],'Skill')
https://login.microsoftonline.com/684b973b-fb6c-4e5d-984e-effedb1ee0b6/oauth2/authorize?client_id=684b973b-fb6c-4e5d-984e-effedb1ee0b6&response_type=id_token+code&redirect_uri=https%3A%2F%2Fwestus2.azuredatabricks.net%2Faad%2Fredirect&state=AHyPsQgAAAGLJSVCNp8gcxaWHjliP2Zyb21fcGVyX3dvcmtzcGFjZV91cmw9dHJ1ZSZvPTUyMzI4MzY0NjYzNjcyN_TDEoTVNvh6JUIIaN-SukI4ft9DJcfyXufkFaqe-vjW&response_mode=form_post&scope=openid3&nonce=-6980452881579230878

Found Jobs:
Logistics Analysts/Inventory Control Analyst

>>> choosing Logistics Analysts/Inventory Control Analyst <<<
Similar concepts:
0.53348 print production techniques
0.53865 process and procedures
0.56089 fact-based decision-making
0.56286 surface characterization
0.56308 analyzing patents
0.56357 data entry and retrieval
0.56512 qa testing tools
0.56512 unmanned systems
0.56512 google ads platform
0.56512 procurement transactions
0.56512 performance and reliability testing


In [None]:
demo('Job',mappings['jobmapping_index_to_title_alttile'][mappings['inverted_jobmapping'][5501]],'Skill')

Found Jobs:
Architects, Except Landscape and Naval/Specifications Writer

>>> choosing Architects, Except Landscape and Naval/Specifications Writer <<<
Similar concepts:
0.55691 process and procedures
0.55862 print production techniques
0.59912 fact-based decision-making
0.60059 analyzing patents
0.60070 surface characterization
0.60190 procurement transactions
0.60190 programmatic leadership
0.60190 cable splicing
0.60190 performance and reliability testing
0.60190 qa testing tools
0.60190 google ads platform


In [None]:
demo('Job',mappings['jobmapping_index_to_title_alttile'][mappings['inverted_jobmapping'][1552]],'Skill')

Found Jobs:
Clinical Research Coordinators/Clinical Research Director

>>> choosing Clinical Research Coordinators/Clinical Research Director <<<
Similar concepts:
0.57790 process and procedures
0.58642 print production techniques
0.64053 analyzing patents
0.64061 fact-based decision-making
0.64094 procurement transactions
0.64094 unmanned systems
0.64094 performance and reliability testing
0.64094 programmatic leadership
0.64094 cable splicing
0.64094 qa testing tools
0.64094 healthcare marketplace
0.64094 google ads platform
0.64103 surface characterization
0.64152 user provisioning
0.64595 data entry and retrieval
0.64628 background verification
0.65246 operating welding equipment
0.65888 good judgement
0.66054 problem formulation
0.66349 environmental plan
0.66473 control panel layout
0.67371 wireless device support
0.69324 understanding of cloud services
0.73713 data governance strategies
0.73838 quality service improvement
0.74208 global services organization
0.74273 usability ef

In [None]:
demo('Skill','automation using python','Skill')

Found Skills:
automation using python

>>> choosing automation using python <<<
Similar concepts:
0.00000 automation using python
0.05167 computerized patient care systems
0.12407 data collecting
0.14114 oral and written reports
0.14987 peer-reviewed publications
0.15035 personnel support
0.15037 primary location
0.15324 scheduling policies
0.15445 digital camera
0.15459 climate change
0.15491 managed care industry knowledge


In [None]:
demo('Skill','automation using python','Skill')

Found Skills:
automation using python

>>> choosing automation using python <<<
Similar concepts:
0.00000 automation using python
0.05167 computerized patient care systems
0.12407 data collecting
0.14114 oral and written reports
0.14987 peer-reviewed publications
0.15035 personnel support
0.15037 primary location
0.15324 scheduling policies
0.15445 digital camera
0.15459 climate change
0.15491 managed care industry knowledge


In [None]:
demo('Job',mappings['jobmapping_index_to_title_alttile'][mappings['inverted_jobmapping'][3459]],'Skill')

Found Jobs:
Accountants and Auditors/General Ledger Accountant
Accountants and Auditors/General Accountant

>>> choosing Accountants and Auditors/General Ledger Accountant <<<
Similar concepts:
0.58790 process and procedures
0.59110 print production techniques
0.63379 analyzing patents
0.63417 surface characterization
0.63428 fact-based decision-making
0.63446 cable splicing
0.63446 healthcare marketplace
0.63446 performance and reliability testing
0.63446 procurement transactions
0.63446 qa testing tools
0.63446 google ads platform


In [None]:
demo('Job','*Construction Worker*','Job')

Found Jobs:
Insulation Workers, Floor, Ceiling, and Wall/Construction Insulation Installer
First-Line Supervisors of Construction Trades and Extraction Workers/Multifamily Superintendent
First-Line Supervisors of Construction Trades and Extraction Workers/Commercial Construction Superintendent
First-Line Supervisors of Construction Trades and Extraction Workers/Building Insulation Supervisor
First-Line Supervisors of Construction Trades and Extraction Workers/Painting Supervisor

>>> choosing Insulation Workers, Floor, Ceiling, and Wall/Construction Insulation Installer <<<
Similar concepts:
0.00000 Insulation Workers, Floor, Ceiling, and Wall/Construction Insulation Installer
0.00858 Insulation Workers, Floor, Ceiling, and Wall/Insulation Installer
0.01141 Insulation Workers, Floor, Ceiling, and Wall/Fiberglass Insulation Installer
0.01398 Carpenters/Panel Installer
0.01467 Heating, Air Conditioning, and Refrigeration Mechanics and Installers/Heating Systems Installer
0.01524 Sheet Me