In [1]:
from utility import create_milvus_collection, drop_milvus_collection
from vector_db import MilvusCollection
from json import load
from towhee import AutoPipes, AutoConfig
from setting import METRIC_TYPE


create_milvus_collection()

collection = MilvusCollection()
collection.create_partition("test")

with open('../data/context_keyword_pair.json', 'r') as f:
    data = load(f)

for datum in data:
    datum['keywords'] = [keyword.strip() for keyword in datum['keywords'].split(',')] # will be removed later




In [2]:
id2context = [None] * len(data)
keyword2context = {}

for i, datum in enumerate(data):
    for keyword in datum['keywords']:
        if keyword in keyword2context:
            keyword2context[keyword].append(i)
        else:
            keyword2context[keyword] = [i]
    id2context[i] = datum['context']

config = AutoConfig.load_config('sentence_embedding')
config.model = 'average_word_embeddings_glove.6B.300d'
sentence_embedding = AutoPipes.pipeline('sentence_embedding', config=config)

keywords = list(keyword2context.keys())
collection.insert([keywords,
                   [embedding.get()[0] for embedding in sentence_embedding.batch(keywords)],
                   list(keyword2context.values())], "test")

In [18]:
search_params = {
    "metric_type": METRIC_TYPE
}

results = collection.search([sentence_embedding('Naruto').get()[0]], "embedding", search_params, 5, partition_names=["test"], output_fields=["keyword", "context_ids"])[0]

In [19]:
for result in results:
    print("distance: ", result.distance)
    entity = result.entity
    print("found keyword: ", entity.keyword)
    for i, context_id in enumerate(entity.context_ids):
        print("==================================================================")
        print("corresponding context: ", i + 1, ". ", id2context[context_id])
    print('\n')

distance:  0.9999998807907104
found keyword:  Naruto
corresponding context:  1 .  The first season of the "Naruto" anime series is directed by Hayato Date, and produced by Studio Pierrot and TV Tokyo. Based on Masashi Kishimoto's manga series, the season follows Naruto Uzumaki living in the Hidden Leaf Village, determined to become the next Hokage and gain the respect of the villagers. The first season ran from October 3, 2002 to November 5, 2003 on TV Tokyo. It was also released with the English version from September 2005 to November 2006 on Cartoon Network's Toonami and YTV's Bionix programming blocks. 


distance:  0.4659806489944458
found keyword:  Hokusai Manga
corresponding context:  1 .  The word first came into common usage in the late 18th century with the publication of such works as Santō Kyōden's picturebook "Shiji no yukikai" (1798), and in the early 19th century with such works as Aikawa Minwa's "Manga hyakujo" (1814) and the celebrated "Hokusai Manga" books (1814–1834) 

In [5]:
raise KeyboardInterrupt
collection.drop_partition("test")
drop_milvus_collection()

KeyboardInterrupt: 