In [1]:
import eec
import numpy as np
import gensim
import pandas as pd
from neo4j import GraphDatabase

In [2]:
keyed_vectors = gensim.models.KeyedVectors.load('./data/word2vec.model')

In [3]:
keyed_vectors.wv.most_similar('lettuce')

[('romaine', 0.8329761028289795),
 ('head', 0.7326541543006897),
 ('boston', 0.7135635614395142),
 ('cabbage', 0.7096205353736877),
 ('rack', 0.6752064228057861),
 ('spring', 0.6698594093322754),
 ('turnip', 0.6696361303329468),
 ('pig', 0.6667503714561462),
 ('cup', 0.6314897537231445),
 ('kaffir', 0.6175163388252258)]

In [4]:
eec.Neo4JHelper('bolt://localhost:7687', 'neo4j', 'test')

<eec.implementations.neo4j.neo4j_services.neo4j_helper.Neo4JHelper at 0x283d710c0>

In [5]:
entity_repository = eec.Neo4JEntityRepository(
    keyed_vectors=keyed_vectors, 
)

In [6]:
entity_repository.add_entity(
    eec.Neo4JEntity(
        entity_id='',
        entity_source='demo',
        entity_source_id='0',
        mention='lettuce',
        mention_vector=keyed_vectors.wv['lettuce'],
        has_mention_vector=True,)

)

6eb91db4-370c-4a55-8b46-33383d7bd8ac-lettuce

In [6]:
entity_repository.get_entity_by_id('036cd15e-13ce-4aa7-9cdc-89ef9afb7dab')

NotFoundException: Entity with id 036cd15e-13ce-4aa7-9cdc-89ef9afb7dab not found

In [8]:
food_com_df = pd.read_csv('./data/all.csv')
food_com_df

Unnamed: 0,id,phrase,target,id_phrase
0,4308,"medium heads bibb or red leaf lettuce, washed,...",lettuce,0
1,4308,mixed baby lettuces and spring greens,lettuce,1
2,4308,romaine lettuce leaf,lettuce,2
3,4308,iceberg lettuce leaf,lettuce,3
4,4308,red romaine lettuce,lettuce,4
...,...,...,...,...
11654,6702,soybeans,soybean,11654
11655,3318,goose,goose,11655
11656,47,ajwain,ajwain,11656
11657,750,brinjals,brinjal,11657


In [9]:
entities : list[eec.Neo4JEntity] = []
for index, row in food_com_df.iterrows():
    entities.append(eec.Neo4JEntity(
        entity_id=str(index),
        entity_source='food_com',
        entity_source_id=str(index),
        mention=row['phrase'],
    ))

entity_repository.add_entities(entities)

In [7]:
cluster_repository = eec.Neo4JClusterRepository(
    entity_repository=entity_repository,
)

In [11]:
cluster_repository.add_cluster(
    eec.Neo4JCluster(
    cluster_id='',
    entities= [],
    cluster_name='apple',
    ))

Cluster 71006665-cbf6-446a-b4b2-de501068874a-apple

In [12]:
cluster_repository.add_cluster(
    eec.Neo4JCluster(
    cluster_id='',
    entities= [],
    cluster_name='lettuce',
    ))

Cluster 27e3b608-6cd3-439e-b18e-afa07651e1c8-lettuce

In [8]:
cluster1 = cluster_repository.get_cluster_by_id('27e3b608-6cd3-439e-b18e-afa07651e1c8')

In [9]:
cluster_repository.get_all_clusters()

[Cluster 71006665-cbf6-446a-b4b2-de501068874a-apple,
 Cluster 27e3b608-6cd3-439e-b18e-afa07651e1c8-lettuce]

In [10]:
entity_1 = entity_repository.get_entity_by_id('83336e7d-e04a-4cf7-b041-6568c918c339')
entity_1

83336e7d-e04a-4cf7-b041-6568c918c339-romaine lettuce leaf

In [11]:
cluster_repository.add_entity_to_cluster(cluster1.cluster_id, entity_1.entity_id)

In [12]:
entity2 = entity_repository.get_entity_by_id('00d01cc4-09b6-48e8-bcf6-8d05ad885cb8')
cluster_repository.add_entity_to_cluster(cluster1.cluster_id, entity2.entity_id)

In [13]:
cluster1 = cluster_repository.get_cluster_by_id('27e3b608-6cd3-439e-b18e-afa07651e1c8')

In [13]:
cluster_repository.remove_entities_from_clusters(cluster_id=cluster1.cluster_id, entity_ids=[entity_1.entity_id, entity2.entity_id])

[83336e7d-e04a-4cf7-b041-6568c918c339-romaine lettuce leaf,
 00d01cc4-09b6-48e8-bcf6-8d05ad885cb8-head of lettuce]

In [10]:
cluster_repository.remove_entity_from_cluster(cluster_id='58841562-e83a-47ef-a718-b149bba7b9eb', entity_id='5fd30fba-58b6-4700-8c0e-081e9ec461a9')

5fd30fba-58b6-4700-8c0e-081e9ec461a9-romaine lettuce leaf

In [15]:
mention_clustering_method = eec.Neo4JMentionClusteringMethod(
    name='neo4j_mention_clustering_method',
    entity_repository=entity_repository,
    cluster_repository=cluster_repository,
)

In [16]:
eec.EntityClustererBridge().set_cluster_repository(cluster_repository)
eec.EntityClustererBridge().set_entity_repository(entity_repository)
eec.EntityClustererBridge().set_mention_clustering_method(mention_clustering_method)

In [17]:
entity = eec.EntityClustererBridge().entity_repository.get_random_unlabeled_entity()

In [18]:
entity

6b2e4fe9-4dd3-4ed7-b0df-7a6796fe97c4-red maraschino cherry

In [19]:
eec.EntityClustererBridge().mention_clustering_method.getPossibleClusters(entity)

TypeError: string indices must be integers

In [22]:
cluster_repository.add_cluster(eec.BaseCluster(
    cluster_id='2',
    cluster_name='dip sauce',
    entities=[],
))

Cluster 1-dip sauce

In [23]:
cluster_repository.add_entity_to_cluster('2', entity2.entity_id)

NotFoundException: Cluster with id {cluster_id} not found.

In [19]:
entity2 = eec.EntityClustererBridge().entity_repository.get_random_unlabeled_entity()

In [20]:
entity2

8883-spinach dip

In [21]:
eec.EntityClustererBridge().mention_clustering_method.getPossibleClusters(entity2)

[Cluster 0-cabbage]