In [1]:
import sys
import pickle

# TO CHANGE
BASEDIR = "../../"
sys.path.insert(0, BASEDIR)

In [2]:
TRIALS = 2
FIX_FILE_PATH = "./import_fix.py"
for _ in range(TRIALS):
    try:
      from src import PersonalAI, PersonalAIConfig, QAPipelineConfig, MemPipelineConfig, \
            GraphModelConfig, EmbeddingsModelConfig, EmbedderModelConfig

      from src.db_drivers import KeyValueDriverConfig, GraphDriverConfig, VectorDriverConfig
      from src.db_drivers.kv_driver import DEFAULT_INMEMORYKV_CONFIG
      from src.db_drivers.graph_driver import DEFAULT_INMEMORYGRAPH_CONFIG
      from src.db_drivers.vector_driver import VectorDBConnectionConfig

      from src.pipelines.qa.knowledge_retriever import AStarGraphSearchConfig, AStarMetricsConfig, BFSSearchConfig, MixturedGraphSearchConfig
      from src.pipelines.qa import QueryLLMParserConfig, KnowledgeComparatorConfig, KnowledgeRetrieverConfig, QALLMGeneratorConfig

      from src.pipelines.memorize import LLMExtractorConfig, LLMUpdatorConfig

      from src.utils import NodeType, Logger
    except RuntimeError as e:
        from pathlib import Path
        fix_path = Path(FIX_FILE_PATH)
        if fix_path.is_file():
            %run {fix_path} --base_dir BASEDIR
        else:
            raise e

  from tqdm.autonotebook import tqdm, trange


#### 1. Загружем датасет с триплетами, на основе которого будет построен граф знаний

In [3]:
PKL_GRAPH_PATH = '../../data/pickled_graphs/testdb.pickle'

with open(PKL_GRAPH_PATH, 'rb') as f:
    formated_triplets = pickle.load(f)

In [4]:
length = 10355

print(len(formated_triplets))
formated_triplets = formated_triplets[:length]
print(len(formated_triplets))

10355
10355


#### 2. Задаём конфигурацию графа знаний

In [5]:
# Graph model configuration
GRAPH_STORAGE_CONFIG = GraphDriverConfig(db_vendor='inmemory_graph', db_config=DEFAULT_INMEMORYGRAPH_CONFIG)
GRAPH_MODEL_CONFIG = GraphModelConfig(driver_config=GRAPH_STORAGE_CONFIG)

In [6]:
# Vector model configuration
NODES_DB_PATH = '../../data/graph_structures/vectorized_nodes/testing' # TO CHANGE
TRIPLETS_DB_PATH = '../../data/graph_structures/vectorized_triplets/testing' # TO CHANGE
NEED_TO_CLEAR = True

VECTOR_NODES_STORAGE_CONFIG = VectorDriverConfig(db_config=VectorDBConnectionConfig(path=NODES_DB_PATH, need_to_clear=NEED_TO_CLEAR))
VECTOR_TRIPLETS_STIRAGE_CONFIG = VectorDriverConfig(db_config=VectorDBConnectionConfig(path=TRIPLETS_DB_PATH, need_to_clear=NEED_TO_CLEAR))

DEVICE = 'cuda' # TO CHANGE
EMBEDDER_MODEL_PATH = '../../models/intfloat/multilingual-e5-small' # TO CHANGE
EMBEDDER_MODEL_CONFIG = EmbedderModelConfig(model_name_or_path=EMBEDDER_MODEL_PATH, device=DEVICE)

VECTOR_MODEL_CONFIG = EmbeddingsModelConfig(
    nodesdb_driver_config=VECTOR_NODES_STORAGE_CONFIG,
    tripletsdb_driver_config=VECTOR_TRIPLETS_STIRAGE_CONFIG,
    embedder_config=EMBEDDER_MODEL_CONFIG)

In [7]:
# QA-pipeline retrieve stage configuration (configuring mixture graph search/retriever)
KV_STORAGE_CONFIG = KeyValueDriverConfig(db_vendor='inmemory_kv', db_config=DEFAULT_INMEMORYKV_CONFIG)
ASTAR_RETRIEVER_CONFIG = AStarGraphSearchConfig(
    metrics_config=AStarMetricsConfig(
        h_metric_name='ip', # TO CHANGE 
        kvdriver_config=KV_STORAGE_CONFIG),
    max_depth=20, max_passed_nodes=1000, # TO CHANGE
    accepted_node_types=[NodeType.object , NodeType.hyper, NodeType.episodic]) # TO CHANGE

BFS_RETRIEVER_CONFIG = BFSSearchConfig(
    strict_filter = True, hyper_episodic_num = 15, # TO CHANGE
    chain_triplets_num = 25, other_triplets_num = 6) # TO CHANGE

RETRIEVER_NAME = 'mixture'
RETRIEVER_CONFIG = MixturedGraphSearchConfig(
    astar_config=ASTAR_RETRIEVER_CONFIG,
    bfs_config=BFS_RETRIEVER_CONFIG
)

In [8]:
LANGUAGE = 'en' # TO CHANGE ('ru' | 'en' | 'auto')

In [9]:
# QA-pipeline configuration
QA_PIPELINE_CONFIG = QAPipelineConfig(
    query_parser_config=QueryLLMParserConfig(lang=LANGUAGE),
    knowledge_comparator_config=KnowledgeComparatorConfig(),
    knowledge_retriever_config=KnowledgeRetrieverConfig(
        retriever_method=RETRIEVER_NAME,retriever_config=RETRIEVER_CONFIG),
    answer_generator_config=QALLMGeneratorConfig(lang=LANGUAGE))

# Memorize-pipeline configuration
MEM_PIPELINE_CONFIG = MemPipelineConfig(
    extractor_config=LLMExtractorConfig(lang=LANGUAGE),
    updator_config=LLMUpdatorConfig(lang=LANGUAGE))

PERSONALAI_CONFIG = PersonalAIConfig(
    graph_struct_config=GRAPH_MODEL_CONFIG,
    embedds_struct_config=VECTOR_MODEL_CONFIG,
    qa_pipeline_config=QA_PIPELINE_CONFIG,
    mem_pipeline_config=MEM_PIPELINE_CONFIG,
    log=Logger('log/main'))

#### 3. Инициализируем граф знаний

In [10]:
personalai = PersonalAI(config=PERSONALAI_CONFIG)

No sentence-transformers model found with name ../../models/intfloat/multilingual-e5-small. Creating a new one with mean pooling.


#### 4. Добавляем в граф загруженные триплеты

In [10]:
print("uploading data to graph-storage")
graph_info = personalai.kg_model.graph_struct.create_triplets(formated_triplets)

100%|██████████| 10355/10355 [00:00<00:00, 173751.01it/s]
100%|██████████| 81/81 [01:19<00:00,  1.03it/s]


In [None]:
print("uploading data to vector-storage")
vector_info = personalai.kg_model.embeddings_struct.create_triplets(formated_triplets)

#### 5. Q&A

In [13]:
qa_examples = [
  ("Which device is better in battery life: iPhone11 Pro Max or Xiaomi 11?",
  "Xiaomi 11"),
  ("Kayla has positive, negative or neutral opinion about video of 10PRO on 25.11.2020?",
  "Negative"),
  ("Do Jane and Jonathan have any common devices (which Jane and Jonathan both use)? If so, list common devices. Otherwise, answer 'No'.",
  "Xiaomi"),
  ("Whose opinions from Freda and Bruce about devices are most similar to Kayla's?",
  "Freda"),
  ("Which people have negative opinion about video of 10PRO on 25.11.2020?",
  "Kayla"),
  ("Which people have positive opinion about signal of Mi 10pro on 22.12.2018?",
  "Matthew"),
  ("Jessica has positive, negative or neutral opinion about signal of Apple on 22.12.2018?",
  "Negative")
  ]

In [14]:
for question in qa_examples:
    answer, info = personalai.answer_question(question[0])
    print("INFO: ", info)
    print("MODEL ANSWER: ", answer)
    print("TRUE ANSWER: ", question[1])
    print("=" * 35)

MODEL ANSWER:  It is unclear which device, iPhone 11 Pro Max or Xiaomi 11, is better in battery life since opinions vary widely.
TRUE ANSWER:  Xiaomi 11
MODEL ANSWER:  Negative
TRUE ANSWER:  Negative
MODEL ANSWER:  Yes, Xiaomi
TRUE ANSWER:  Xiaomi
MODEL ANSWER:  There is no direct comparison that aligns with Kayla's opinions.
TRUE ANSWER:  Freda
MODEL ANSWER:  Final answer 3:
TRUE ANSWER:  Kayla
MODEL ANSWER:  No records found.
TRUE ANSWER:  Matthew
MODEL ANSWER:  Cannot be determined
TRUE ANSWER:  Negative


: 