In [1]:
from utility import create_milvus_collection, milvus_collection_exists, drop_milvus_collection, create_sqlite_db, sqlite_db_exists, drop_sqlite_db
from relational_db import SQLiteDB
from setting import TABEL2FIELD, METRIC_TYPE
from json import load
from vector_db import MilvusCollection
from towhee import AutoConfig, AutoPipes


keyword2context = {}

# Create a relational db to store context and its id
create_sqlite_db()
db = SQLiteDB()

for table, fields in TABEL2FIELD.items():
    db.create_table(table, fields)

# Read context-to-keywords json file and make keyword-to-contexts mapping
with open('../data/context_keyword_pair.json', 'r') as f:
    data = load(f)

for i, datum in enumerate(data):
    db.insert('context', f"{i}, '" + datum['context'].replace("'", "''") + "'")
    for keyword in datum['keywords']:
        if keyword in keyword2context:
            keyword2context[keyword].append(i)
        else:
            keyword2context[keyword] = [i]

# Embed all keywords and insert into vector DB
if not milvus_collection_exists():
    create_milvus_collection()

collection = MilvusCollection()
if not collection.has_partition("test"):
    collection.create_partition("test")

config = AutoConfig.load_config('sentence_embedding')
config.model = 'average_word_embeddings_glove.6B.300d'
sentence_embedding = AutoPipes.pipeline('sentence_embedding', config=config)

keywords = list(keyword2context.keys())
collection.insert([keywords,
                   [embedding.get()[0] for embedding in sentence_embedding.batch(keywords)],
                   list(keyword2context.values())], "test")




In [2]:
search_params = {
    "metric_type": METRIC_TYPE
}

results = collection.search([sentence_embedding('Monkey D. Luffy').get()[0]], "embedding", search_params, 5, partition_names=["test"], output_fields=["keyword", "context_ids"])[0]

In [3]:
for result in results:
    print("distance: ", result.distance)
    entity = result.entity
    print("found keyword: ", entity.keyword)
    for i, context_id in enumerate(entity.context_ids):
        print("==================================================================")
        print("corresponding context: ", i + 1, ". ", db.select('context', 'context', f'id = {context_id}')[0][0])
    print('\n')

distance:  1.0000001192092896
found keyword:  Monkey D. Luffy
corresponding context:  1 .  , commonly known simply as "Dragon the Revolutionary", is the father of the Straw Hat Pirates Captain Monkey D. Luffy and the son of the Marines hero Monkey D. Garp. He is the infamous leader and founder of the Revolutionary Army who has been attempting to overthrow the World Government. Not much is known about his activities, background, history, or power. He is the World Government's greatest enemy, and is the most dangerous and most wanted man in the world.
corresponding context:  2 .  The protagonists of the "One Piece" series are all the members of the , a crew of nine pirates captained by Monkey D. Luffy. The crew's number increases throughout the series, as Luffy recruits new members. Once Usopp joins the Straw Hat Pirates, they gain their own ship, the "Going Merry", which is later destroyed and replaced by a larger and more powerful vessel, the "Thousand Sunny" created by their shipwrigh

In [4]:
raise KeyboardInterrupt

if sqlite_db_exists():
    drop_sqlite_db()

if collection.has_partition("test"):
    collection.drop_partition("test")
    
if milvus_collection_exists():
    drop_milvus_collection()

KeyboardInterrupt: 