In [1]:
from qdrant_client import QdrantClient
import torch
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM

In [2]:
# === КОНФИГ ===
EMBED_MODEL_NAME = "Qwen/Qwen3-Embedding-0.6B"
COLLECTION_NAME = "nlp2025_chunks"
TOP_K = 10
LLM_MODEL_NAME = "Qwen/Qwen2.5-1.5B-Instruct"

In [3]:
# === ЗАПРОС ===
QUERY = "Which scientific papers explore graphs within the biomedical domain?"

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on: {device.upper()}")

encoder = SentenceTransformer(EMBED_MODEL_NAME, trust_remote_code=True, device=device)

client = QdrantClient("localhost", port=6333)

Running on: CUDA


## Функция ретривера

In [5]:
def retrieve(query_str: str, top_k: int):

    query_vector = encoder.encode(query_str, convert_to_numpy=True)

    search_results = client.query_points(
        collection_name=COLLECTION_NAME,
        query=query_vector,
        limit=top_k,
        with_payload=True,
        with_vectors=False
    )
    
    return search_results

In [6]:
# === ЗАПУСК ТЕСТА ===
if __name__ == "__main__":
    print(f"Query: {QUERY}\n" + "-"*30)
    
    results = retrieve(QUERY, top_k=TOP_K)
    
    if not results.points:
        print("Ничего не найдено. Проверьте имя коллекции или наличие данных")
    else:
        for i, point in enumerate(results.points):
            # Извлекаем текст из payload (предполагаем, что поле называется 'text' или 'content')
            # Адаптируй ключи payload под свою структуру загрузки
            payload = point.payload
            score = point.score
            
            # Пример вывода
            print(f"[{i+1}] Score: {score:.4f} | ID: {point.id}")
            print(f"Text snippet: {payload.get('text', '')[:100]}...")
            print("-" * 20)

Query: Which scientific papers explore graphs within the biomedical domain?
------------------------------
[1] Score: 0.6980 | ID: 764364
Text snippet: Role: You are an expert Biological Graph Annotator....
--------------------
[2] Score: 0.6849 | ID: 86600
Text snippet: Grounding LLM Reasoning with Knowledge Graphs > Appendix A GRBench Statistics: Detailed statistics o...
--------------------
[3] Score: 0.6573 | ID: 152500
Text snippet: What’s In Your Field? Mapping Scientific Research with Knowledge Graphs and LLMs > 3 Demo > 3.2 Quer...
--------------------
[4] Score: 0.6510 | ID: 698179
Text snippet: BioVerge: A Comprehensive Benchmark and Study of Self-Evaluating Agents for Biomedical Hypothesis Ge...
--------------------
[5] Score: 0.6509 | ID: 707191
Text snippet: Assessing LLMs for Serendipity Discovery in Knowledge Graphs: A Case for Drug Repurposing > Appendix...
--------------------
[6] Score: 0.6468 | ID: 34771
Text snippet: Self-supervised Quantized Representation for Seam

## Generation

In [7]:
# === ИНИЦИАЛИЗАЦИЯ МОДЕЛИ ===
print("Загрузка LLM...")
tokenizer = AutoTokenizer.from_pretrained(LLM_MODEL_NAME)

model = AutoModelForCausalLM.from_pretrained(
    LLM_MODEL_NAME,
    dtype=torch.bfloat16, 
    device_map="auto"
)

Загрузка LLM...


In [8]:
def generate_rag_answer(query: str, retrieved_chunks) -> str:
    
    context_text = "\n\n---\n\n".join(retrieved_chunks)
    
    system_prompt = (
        "You are a helpful research assistant. "
        "Answer the user's question mostly based on the provided context below. "
        "If the answer is not in the context, say so. "
        "Cite the paper titles if available in the text."
    )
    
    user_content = f"Context:\n{context_text}\n\nQuestion: {query}"

    # 3. Применяем чат-шаблон (ChatML для Qwen)
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_content}
    ]
    
    # Подготовка тензоров
    text_input = tokenizer.apply_chat_template(
        messages, 
        tokenize=False, 
        add_generation_prompt=True
    )
    model_inputs = tokenizer([text_input], return_tensors="pt").to(model.device)

    # 4. Генерация
    generated_ids = model.generate(
        **model_inputs,
        max_new_tokens=512,
        temperature=0.3,
        top_p=0.9,
        do_sample=True
    )

    # 5. Декодирование (убираем промпт из ответа)
    generated_ids = [
        output_ids[len(input_ids):] 
        for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return response

In [10]:
# === СИМУЛЯЦИЯ ЗАПУСКА (Интеграция с предыдущим шагом) ===
mock_retrieved_chunks = [
    "Title: Graph Neural Networks in Biomedicine. Abstract: This paper surveys the application of GNNs in drug discovery...",
    "Title: Knowledge Graphs for Protein Interaction. Abstract: We propose a new method using graph databases to map protein interactions...",
    "Title: Attention Mechanisms in NLP. Abstract: This paper discusses transformers..." # Нерелевантный пример для теста
]

QUERY = "Which scientific papers explore graphs within the biomedical domain?"

print("-" * 30)
print("Генерация ответа...")
answer = generate_rag_answer(QUERY, mock_retrieved_chunks)

print("\n=== FINAL ANSWER ===\n")
print(answer)

------------------------------
Генерация ответа...

=== FINAL ANSWER ===

The given context mentions two scientific papers that explore graphs within the biomedical domain:

1. "Graph Neural Networks in Biomedicine" - This paper surveys the application of Graph Neural Networks (GNNs) in drug discovery.
2. "Knowledge Graphs for Protein Interaction" - This paper proposes a method using graph databases to map protein interactions.

These papers focus specifically on the use of graphs and related concepts such as neural networks and knowledge graphs in biomedicine and bioinformatics contexts respectively.
