In [1]:
from owlready2 import *
import re
import fasttext 
import numpy as np
from numpy import dot
from numpy.linalg import norm
import networkx as nx

In [2]:
def cosineSimilarity(a, b):
    return np.inner(a, b)/(norm(a)*norm(b))

In [3]:
# Usage: split(name.lower) for name in class_names_list
def delimSplit(string, delimiters = ['-','_'], maxsplit=0):
    import re
    regexPattern = '|'.join(map(re.escape, delimiters))
    return [word for word in re.split(regexPattern, string, maxsplit) if len(word) > 0]

In [4]:
def preprocessString(string):
    string = string.lower()
    string = re.sub("[^a-zA-Z]", " ", string)            # remove non alphabets with a space which is used as a delimiter later
    return string

In [5]:
file_prefix = 'file://'
onto = get_ontology(file_prefix + "AI_10-12-2019.owl").load()



In [6]:
class_list = list(onto.classes())
class_names_list = [c.get_name(c) for c in class_list]

phraseToClassDict = {}
for c in class_list:
    phraseToClassDict[str(delimSplit(c.name.lower()))] = c

In [7]:
def gloveSearch(query, model):
    """ 
    Returns sorted list of class objects closest to the query using glove embeddings of their 'name' 
    
    Parameters: 
    query (str): query 
    model: gensim glove model
  
    Returns: 
    list: list of class object and similarity score of the class name to the query.
  
    """
    query = preprocessString(query)
    results = []
    phrases = [delimSplit(c.lower()) for c in class_names_list]
    for phrase in phrases:
        tokens_1 = [p for p in phrase if p in model.vocab]
        tokens_2 = [t for t in query.split() if t in model.vocab]

        #compute cosine similarity using word embedings 
        cosine=0
        if (len(tokens_1) and len(tokens_2)):
            cosine=model.n_similarity(tokens_1,tokens_2)
            results.append([phrase,cosine])
    results = sorted(results, key = lambda x: x[1], reverse = True)
    results = [[phraseToClassDict[str(result[0])], result[1]] for result in results]
    return results

In [8]:
def fastTextSearch(query, model):
    """ 
    Returns sorted list of class objects closest to the query using fasttext embeddings of their 'name' 
    
    Parameters: 
    query (str): query 
    model: fasttext model with subword information
  
    Returns: 
    list: list of class object and similarity score of the class name to the query.
  
    """
    query = preprocessString(query)
    results = []
    phrases = [delimSplit(c.lower()) for c in class_names_list]
    phrases = [' '.join(word) for word in phrases]
    for phrase in phrases:
        similarityScore = cosineSimilarity(model.get_sentence_vector(phrase), model.get_sentence_vector(query))
        results.append([phrase,similarityScore])
    results = sorted(results, key = lambda x: x[1], reverse = True)
    results = [[phraseToClassDict[str(list(result[0].split()))], result[1]] for result in results]
    return results

In [50]:
query = 'Andrew NG'

In [None]:
import gensim.downloader as api
gloveModel = api.load("glove-wiki-gigaword-100")

In [None]:
results_glove = gloveSearch(query, gloveModel)
for result in results_glove:
    print('Class name: ', result[0].name, '. Score: ', result[1])

In [48]:
fasttextModel = fasttext.load_model('crawl-300d-2M-subword.bin')




In [51]:
results_fastText = fastTextSearch(query, fasttextModel)
for result in results_fastText:
    print('Class name: ', result[0].name, '. Score: ', result[1])

Class name:  f-GAN . Score:  0.63803226
Class name:  Julia . Score:  0.5634246
Class name:  Lempel-Ziv-Welch_algorithm . Score:  0.51531863
Class name:  k-NN . Score:  0.5115085
Class name:  Theil-Sen_estimator . Score:  0.508926
Class name:  Adam . Score:  0.49491572
Class name:  Dempster-Shafer_Theory . Score:  0.48805767
Class name:  Q-Learning . Score:  0.48356014
Class name:  K-Means_Clustering . Score:  0.4741715
Class name:  Wasserstein_GAN . Score:  0.47088957
Class name:  n-gram_analysis . Score:  0.4693178
Class name:  t-SNE . Score:  0.46721533
Class name:  GAN . Score:  0.46471044
Class name:  MAGAN . Score:  0.46277606
Class name:  R_Language . Score:  0.4563136
Class name:  Neural_MT . Score:  0.4557787
Class name:  SQ-RBF_Function . Score:  0.44125172
Class name:  Conditional_GAN_(CGAN) . Score:  0.43948233
Class name:  Interlingual_MT . Score:  0.4383302
Class name:  Hybrid_MT . Score:  0.43297178
Class name:  Laplacian_Pyramid_GAN_(LAPGAN) . Score:  0.43083298
Class na

Class name:  Image_Denoising . Score:  0.2093506
Class name:  Image_Compression . Score:  0.20934568
Class name:  Image_Registration . Score:  0.20926993
Class name:  Transfer_Reinforcement_Learning . Score:  0.20924543
Class name:  Perl . Score:  0.20922303
Class name:  Noisy_Speech_Recognition . Score:  0.20876482
Class name:  Recurrent_Neural_Networks . Score:  0.20876373
Class name:  Domain-specific_synthesis . Score:  0.20869516
Class name:  Known_Environment . Score:  0.20858371
Class name:  Known_Environment . Score:  0.20858371
Class name:  Fluorescence_imaging . Score:  0.20855638
Class name:  Density_Estimation . Score:  0.20829616
Class name:  Image_Comprehension . Score:  0.20797206
Class name:  Bacterial_Foraging_Optimization . Score:  0.20785809
Class name:  Scene_Text_Recognition . Score:  0.20774224
Class name:  Image_Matting . Score:  0.20753083
Class name:  Video_Saliency_Detection . Score:  0.20731929
Class name:  Unsupervised_Learning . Score:  0.2072138
Class name:

In [7]:
adjList = dict()
for c in class_list:
    adjList[c.name] = [s.name for s in c.subclasses()]

In [9]:
G = nx.from_dict_of_lists(adjList)

In [11]:
print('Number of nodes: ', len(list(G.nodes)), '. Number of edges: ', len(list(G.edges())))

Number of nodes:  1084 . Number of edges:  1100


In [41]:
from node2vec import Node2Vec
node2vec = Node2Vec(G)

Computing transition probabilities: 100%|██████████| 1084/1084 [00:00<00:00, 4911.62it/s]
Generating walks (CPU: 1): 100%|██████████| 10/10 [00:42<00:00,  4.54s/it]


In [42]:
model = node2vec.fit()              # returns a gensim wv model

In [46]:
model.wv.most_similar('Boltzmann_Machine')

[('DBM', 0.9310755729675293),
 ('RBM', 0.8851877450942993),
 ('Binary_RBM_with_Contrastive_Divergence', 0.84473717212677),
 ('Hebbian_Learning', 0.8392605781555176),
 ('Perceptrons', 0.835342526435852),
 ('Artificial_Neural_Networks', 0.8352882862091064),
 ('Binary_RBM_with_Persistent_Contrastive_Divergence', 0.8196649551391602),
 ('Hopfield_Network', 0.8088248372077942),
 ('Multilayer_Feed-Forward_Networks', 0.7006083130836487),
 ('Neural_Networks', 0.6821851134300232)]

In [20]:
s = set([c[0] for c in onto.get_triples()])

In [25]:
for obj in onto.classes():
    print('Obj ', obj.name, 'prop ', list(obj.get_properties(obj)))

Obj  Absolute_Error_Loss prop  []
Obj  Activation_Functions prop  []
Obj  AdaDelta prop  []
Obj  Adagrad prop  []
Obj  Adam prop  []
Obj  Agriculture prop  []
Obj  Ant_colony_optimization prop  []
Obj  ArSinH_Function prop  []
Obj  ArcTan_Function prop  []
Obj  Architecture prop  []
Obj  Artificial_Bee_Colony_Optimization prop  []
Obj  Artificial_Fish_Swarm_Algorithm prop  []
Obj  Artificial_Intelligence prop  []
Obj  Artificial_Neural_Networks prop  []
Obj  Arts_and_Entertainment prop  []
Obj  Assistance prop  []
Obj  Astronomy prop  []
Obj  Automobiles prop  []
Obj  Autonomous_Vehicles prop  []
Obj  Backpropogation prop  []
Obj  Bacterial_Foraging_Optimization prop  []
Obj  Banking prop  []
Obj  Behavior_based_AI prop  []
Obj  Bent_identity_Function prop  []
Obj  Best_First_Search prop  []
Obj  Bidirectional_Search prop  []
Obj  Big_Data prop  []
Obj  Binary_Classification_Loss_Functions prop  []
Obj  Binary_sigmoidal_function prop  []
Obj  Binary_step_function prop  []
Obj  Bioinfor

Obj  Corner_and_Interest_Point_Detection prop  []
Obj  Coupled_GAN prop  []
Obj  Curved_Text_Detection prop  []
Obj  Cybernetics_and_brain_simulation prop  []
Obj  DBM prop  []
Obj  DLLearner prop  []
Obj  Data_Augmentation prop  []
Obj  Deblurring prop  []
Obj  Decision-Tree_Regressor prop  []
Obj  Decision_Stump prop  []
Obj  Decision_Theory prop  []
Obj  Decision_Trees prop  []
Obj  Deep_Adversarial_Networks prop  []
Obj  Deep_Attention prop  []
Obj  Deep_Learning prop  []
Obj  Deep_RNN prop  []
Obj  Deep_Reinforcement_Learning prop  []
Obj  Deep_feedforward_and_recurrent_neural_networks prop  []
Obj  Deep_learning_synthesis prop  []
Obj  Default_reasoning_and_the_qualification_problem prop  []
Obj  Defocus_Estimation prop  []
Obj  Deformable_Object_Manipulation prop  []
Obj  Demosaicking prop  []
Obj  Denoising prop  []
Obj  Denoising_VAE prop  []
Obj  Dense_Pixel_Correspondence_Estimation prop  []
Obj  Dense_Video_Captioning prop  []
Obj  Density_Based_Spatial_Clustering prop  []


Obj  Saliency_Detection prop  []
Obj  Saliency_Prediction prop  []
Obj  Salient_Object_Detection prop  []
Obj  Salt-And-Pepper_Noise_Removal prop  []
Obj  Sammon_Mapping prop  []
Obj  Scene-Aware_Dialogue prop  []
Obj  Scene_Classification prop  []
Obj  Scene_Flow_Estimation prop  []
Obj  Scene_Graph_Generation prop  []
Obj  Scene_Parsing prop  []
Obj  Scene_Recognition prop  []
Obj  Scene_Segmentation prop  []
Obj  Scene_Text_Detection prop  []
Obj  Scene_Text_Recognition prop  []
Obj  Scene_Understanding prop  []
Obj  Scruffy prop  []
Obj  Search_and_optimization prop  []
Obj  Self-Organizing_Maps prop  []
Obj  Semantic_Segmentation prop  []
Obj  Semantic_folding prop  []
Obj  Semantics prop  []
Obj  Semi-Supervised_Image_Classification prop  []
Obj  Semi-Supervised_Learning prop  []
Obj  Semi-Supervised_Person_Re-Identification prop  []
Obj  Semi-supervised_Video_Object_Segmentation prop  []
Obj  Sensor prop  []
Obj  Sensor_Modeling prop  []
Obj  Sentence_Embeddings prop  []
Obj  Se

In [19]:
for prop in onto.object_properties():
    print('Property: ', prop.name, '. Domain: ', prop.domain, '. Range: ', prop.range )

Property:  Developer . Domain:  [] . Range:  []
Property:  Documenter . Domain:  [] . Range:  []
Property:  Domain . Domain:  [] . Range:  []
Property:  activity . Domain:  [] . Range:  []
Property:  agent . Domain:  [] . Range:  []
Property:  alternateOf . Domain:  [.Speech-Recognition, .Speech_Recognition] . Range:  [.Fundamental]
Property:  atLocation . Domain:  [] . Range:  []
Property:  creator . Domain:  [] . Range:  []
Property:  employsTechnique . Domain:  [] . Range:  []
Property:  entity . Domain:  [] . Range:  []
Property:  generalizes . Domain:  [] . Range:  []
Property:  generated . Domain:  [] . Range:  []
Property:  hadActivity . Domain:  [] . Range:  []
Property:  hadGeneration . Domain:  [] . Range:  []
Property:  hadMember . Domain:  [] . Range:  []
Property:  hadRole . Domain:  [] . Range:  []
Property:  hadUsage . Domain:  [] . Range:  []
Property:  hasAlgorithmClass . Domain:  [] . Range:  []
Property:  hasAlgorithmConfiguration . Domain:  [] . Range:  []
Property:

In [26]:
for prop in onto.object_properties():
    print('Property: ', prop.name)
    print(onto.search(prop = "*"))
    print('\n')

Property:  Developer
[]


Property:  Documenter
[]


Property:  Domain
[]


Property:  activity
[]


Property:  agent
[]


Property:  alternateOf
[]


Property:  atLocation
[]


Property:  creator
[]


Property:  employsTechnique
[]


Property:  entity
[]


Property:  generalizes
[]


Property:  generated
[]


Property:  hadActivity
[]


Property:  hadGeneration
[]


Property:  hadMember
[]


Property:  hadRole
[]


Property:  hadUsage
[]


Property:  hasAlgorithmClass
[]


Property:  hasAlgorithmConfiguration
[]


Property:  hasFeature
[]


Property:  hasLearningMethod
[]


Property:  hasLearningProblem
[]


Property:  hasTarget
[]


Property:  hasTool
[]


Property:  homepage
[]


Property:  implements
[]


Property:  influenced
[]


Property:  influencer
[]


Property:  invalidated
[]


Property:  isAlgorithmConfigurationOf
[]


Property:  member
[]


Property:  memberOf
[]


Property:  publishedBy
[]


Property:  publishedIn
[]


Property:  range
[]


Property:  release
[]


Proper

In [28]:
graph = default_world.as_rdflib_graph()
for s,p,o in graph:
    print(s, p, o)

http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019 http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Ontology
http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Absolute_Error_Loss http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Activation_Functions http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#AdaDelta http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Adagrad http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Adam http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/AI_

http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Ruby http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Rule-based_MT http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/.owl#SAP http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/.owl#SAS http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/.owl#SBD http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/.owl#SPSS http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://www.w3.org/2002/07/owl#Class
http://www.semanticweb.org/ram/ontologies/2019/6/.owl#SQLServerAnalysisServices http://www.w3.org/1999/02/22-rdf-syntax-ns#type http://w

http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Iterative_deepening_A*_Search http://www.w3.org/2000/01/rdf-schema#subClassOf http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Heuristic_Search
http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Leaky_rectified_linear_unit_(Leaky_ReLU) http://www.w3.org/2000/01/rdf-schema#subClassOf http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Activation_Functions
http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Mean_Absolute_Error_(MAE) http://www.w3.org/2000/01/rdf-schema#subClassOf http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Absolute_Error_Loss
http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Mean_Square_Error_(MSE) http://www.w3.org/2000/01/rdf-schema#subClassOf http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Squared_Error_Loss
http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Parameteric_rectified_linear_unit_(PReLU) h

63 http://www.w3.org/1999/02/22-rdf-syntax-ns#rest 64
64 http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Binary_step_function
64 http://www.w3.org/1999/02/22-rdf-syntax-ns#rest 65
65 http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Bipolar_step_function
65 http://www.w3.org/1999/02/22-rdf-syntax-ns#rest 66
66 http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#ElliotSig_Function
66 http://www.w3.org/1999/02/22-rdf-syntax-ns#rest 67
67 http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Gaussian_Function
67 http://www.w3.org/1999/02/22-rdf-syntax-ns#rest 68
68 http://www.w3.org/1999/02/22-rdf-syntax-ns#first http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Identity_function
68 http://www.w3.org/1999/02/22-rdf-syntax-ns#rest 69
69 

In [29]:
prop = set([p for s, p, o in graph])

In [30]:
prop

{rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#sharesDefinitionWith'),
 rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#first'),
 rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#rest'),
 rdflib.term.URIRef('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
 rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#comment'),
 rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#domain'),
 rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#range'),
 rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf'),
 rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subPropertyOf'),
 rdflib.term.URIRef('http://www.w3.org/2002/07/owl#annotatedProperty'),
 rdflib.term.URIRef('http://www.w3.org/2002/07/owl#annotatedSource'),
 rdflib.term.URIRef('http://www.w3.org/2002/07/owl#annotatedTarget'),
 rdflib.term.URIRef('http://www.w3.org/2002/07/owl#disjointWith'),
 rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equ

In [33]:
subs = set([s for s, p, o in graph])

In [34]:
subs

{rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Optical_Flow'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Machine_Learning'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Binary_RBM_with_Contrastive_Divergence'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#microscopy'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/AI_18-11-2019#Soft_Clipping_Function'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Scene_Classification'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Active_Object_Localization'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Audio-Visual_Video_Captioning'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Distributional_learning'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#

In [49]:
import rdflib
list(graph.subjects(predicate =  rdflib.term.URIRef('http://www.w3.org/2002/07/owl#equivalentClass')))

[rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Convolutional_Neural_Networks'),
 rdflib.term.URIRef('http://www.semanticweb.org/ram/ontologies/2019/6/.owl#Recurrent_Neural_Networks')]

In [37]:
len(list(onto.classes())) + len

1088

In [38]:
obs = set([o for s, p, o in graph])

In [39]:
len(obs)

575

In [40]:
obs

{rdflib.term.BNode('1'),
 rdflib.term.BNode('10'),
 rdflib.term.BNode('100'),
 rdflib.term.BNode('101'),
 rdflib.term.BNode('102'),
 rdflib.term.BNode('103'),
 rdflib.term.BNode('104'),
 rdflib.term.BNode('106'),
 rdflib.term.BNode('107'),
 rdflib.term.BNode('108'),
 rdflib.term.BNode('109'),
 rdflib.term.BNode('11'),
 rdflib.term.BNode('110'),
 rdflib.term.BNode('112'),
 rdflib.term.BNode('113'),
 rdflib.term.BNode('114'),
 rdflib.term.BNode('115'),
 rdflib.term.BNode('116'),
 rdflib.term.BNode('118'),
 rdflib.term.BNode('119'),
 rdflib.term.BNode('120'),
 rdflib.term.BNode('121'),
 rdflib.term.BNode('122'),
 rdflib.term.BNode('123'),
 rdflib.term.BNode('124'),
 rdflib.term.BNode('125'),
 rdflib.term.BNode('127'),
 rdflib.term.BNode('128'),
 rdflib.term.BNode('129'),
 rdflib.term.BNode('13'),
 rdflib.term.BNode('131'),
 rdflib.term.BNode('132'),
 rdflib.term.BNode('133'),
 rdflib.term.BNode('135'),
 rdflib.term.BNode('136'),
 rdflib.term.BNode('137'),
 rdflib.term.BNode('139'),
 rdfli