In [1]:
import torch
import time
from qdrant_client import QdrantClient
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM

In [2]:
class ScienceRAG:
    def __init__(self, 
                 qdrant_host: str = "localhost", 
                 qdrant_port: int = 6333,
                 collection_name: str = "nlp2025_chunks",
                 embed_model: str = "Qwen/Qwen3-Embedding-0.6B",
                 llm_model: str = "Qwen/Qwen2.5-1.5B-Instruct"):
        
        self.collection_name = collection_name
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"–ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è RAG –Ω–∞ —É—Å—Ç—Ä–æ–π—Å—Ç–≤–µ: {self.device.upper()}")

        # 1. –ü–æ–¥–∫–ª—é—á–µ–Ω–∏–µ –∫ –±–∞–∑–µ –∑–Ω–∞–Ω–∏–π
        self.client = QdrantClient(qdrant_host, port=qdrant_port)
        
        # 2. –ó–∞–≥—Ä—É–∑–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞ (–≠–º–±–µ–¥–¥–∏–Ω–≥–∏)
        print(f"–ó–∞–≥—Ä—É–∑–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞: {embed_model}...")
        self.encoder = SentenceTransformer(embed_model, trust_remote_code=True, device="cpu")
        
        # 3. –ó–∞–≥—Ä—É–∑–∫–∞ –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞ (LLM)
        print(f"üß† –ó–∞–≥—Ä—É–∑–∫–∞ LLM: {llm_model}...")
        self.tokenizer = AutoTokenizer.from_pretrained(llm_model)
        
        # –ò–ó–ú–ï–ù–ï–ù–ò–Ø –ó–î–ï–°–¨:
        # 1. –£–±—Ä–∞–ª–∏ device_map="auto"
        # 2. –°–º–µ–Ω–∏–ª–∏ bfloat16 –Ω–∞ float16
        # 3. –Ø–≤–Ω–æ –æ—Ç–ø—Ä–∞–≤–∏–ª–∏ .to(self.device)
        self.model = AutoModelForCausalLM.from_pretrained(
            llm_model,
            torch_dtype=torch.float16, 
            attn_implementation="sdpa" # –í–∫–ª—é—á–∞–µ–º –±—ã—Å—Ç—Ä–æ–µ –≤–Ω–∏–º–∞–Ω–∏–µ (Scaled Dot Product Attention)
        ).to(self.device)
        print("–°–∏—Å—Ç–µ–º–∞ –≥–æ—Ç–æ–≤–∞ –∫ —Ä–∞–±–æ—Ç–µ!\n")

    
    def _retrieve(self, query: str, top_k: int = 5):
        """–í–Ω—É—Ç—Ä–µ–Ω–Ω–∏–π –º–µ—Ç–æ–¥: –ø–æ–∏—Å–∫ –≤–µ–∫—Ç–æ—Ä–æ–≤"""
        query_vector = self.encoder.encode(query, convert_to_numpy=True)
        
        search_result = self.client.query_points(
            collection_name=self.collection_name,
            query=query_vector,
            limit=top_k,
            with_payload=True
        )
        
        # –£–ø—Ä–æ—â–∞–µ–º —Å—Ç—Ä—É–∫—Ç—É—Ä—É –¥–ª—è –≥–µ–Ω–µ—Ä–∞—Ç–æ—Ä–∞
        return [point.payload for point in search_result.points]


    def _format_context(self, chunks) -> str:
        """–°–æ–±–∏—Ä–∞–µ—Ç –Ω–∞–π–¥–µ–Ω–Ω—ã–µ —á–∞–Ω–∫–∏ –≤ –æ–¥–∏–Ω —Ç–µ–∫—Å—Ç –¥–ª—è –ø—Ä–æ–º–ø—Ç–∞"""
        formatted_text = ""
        for i, chunk in enumerate(chunks):
            # –ê–¥–∞–ø—Ç–∏—Ä—É–π –∫–ª—é—á–∏ –ø–æ–¥ —Å–≤–æ–π payload JSON
            title = chunk.get('title', 'Unknown Title')
            text = chunk.get('text', chunk.get('abstract', '')) # fallback –µ—Å–ª–∏ –Ω–µ—Ç text
            
            formatted_text += f"Document [{i+1}]\nTitle: {title}\nContent: {text}\n\n"
        return formatted_text


    def answer(self, query: str, top_k: int = 5) -> str:
            t0 = time.time()
            
            # --- –≠–¢–ê–ü 1: –†–ï–¢–†–ò–í ---
            print(f"üîç –ò—â—É –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é...")
            retrieved_chunks = self._retrieve(query, top_k)
            t1 = time.time()
            print(f"‚è±Ô∏è  Retrieval (Encode + Search): {t1 - t0:.4f} —Å–µ–∫")
            
            if not retrieved_chunks:
                return "–ù–µ—Ç –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏."
    
            # --- –≠–¢–ê–ü 2: –ü–û–î–ì–û–¢–û–í–ö–ê ---
            context = self._format_context(retrieved_chunks)
            system_prompt = (
                "You are a helpful scientific assistant. "
                "Use the provided context to answer the user's question. "
                "If the context doesn't contain the answer, admit it."
            )
            messages = [
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": f"Context:\n{context}\n\nQuestion: {query}"}
            ]
            
            text_input = self.tokenizer.apply_chat_template(
                messages, tokenize=False, add_generation_prompt=True
            )
            model_inputs = self.tokenizer([text_input], return_tensors="pt").to(self.device)
            t2 = time.time()
            
            # --- –≠–¢–ê–ü 3: –ì–ï–ù–ï–†–ê–¶–ò–Ø ---
            print("‚úçÔ∏è  –ì–µ–Ω–µ—Ä–∏—Ä—É—é –æ—Ç–≤–µ—Ç...")
            with torch.no_grad():
                generated_ids = self.model.generate(
                    **model_inputs,
                    max_new_tokens=512,
                    temperature=0.3,
                    top_p=0.9,
                    do_sample=True
                )
            t3 = time.time()
            print(f"‚è±Ô∏è  Generation (LLM): {t3 - t2:.4f} —Å–µ–∫")
    
            response = self.tokenizer.batch_decode(
                [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)],
                skip_special_tokens=True
            )[0]
            
            print(f"‚è±Ô∏è  Total: {t3 - t0:.2f} —Å–µ–∫")
            return response

In [3]:
rag = ScienceRAG()

# –ó–∞–ø—Ä–æ—Å
user_query = "How are Graph Neural Networks used in drug discovery?"

answer = rag.answer(user_query, top_k=5)

print("\n" + "="*40)
print("OTBET:")
print(answer)
print("="*40)

–ò–Ω–∏—Ü–∏–∞–ª–∏–∑–∞—Ü–∏—è RAG –Ω–∞ —É—Å—Ç—Ä–æ–π—Å—Ç–≤–µ: CUDA
–ó–∞–≥—Ä—É–∑–∫–∞ —Ä–µ—Ç—Ä–∏–≤–µ—Ä–∞: Qwen/Qwen3-Embedding-0.6B...
üß† –ó–∞–≥—Ä—É–∑–∫–∞ LLM: Qwen/Qwen2.5-1.5B-Instruct...


`torch_dtype` is deprecated! Use `dtype` instead!


–°–∏—Å—Ç–µ–º–∞ –≥–æ—Ç–æ–≤–∞ –∫ —Ä–∞–±–æ—Ç–µ!

üîç –ò—â—É –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é...
‚è±Ô∏è  Retrieval (Encode + Search): 0.2322 —Å–µ–∫
‚úçÔ∏è  –ì–µ–Ω–µ—Ä–∏—Ä—É—é –æ—Ç–≤–µ—Ç...
‚è±Ô∏è  Generation (LLM): 10.5557 —Å–µ–∫
‚è±Ô∏è  Total: 10.80 —Å–µ–∫

OTBET:
Graph Neural Networks (GNNs) are used in drug discovery through various applications such as:

1. **Drug Target Binding Affinity Prediction**: The Hybrid Graph-Transformer framework introduced by Xiao et al. ([2024]) uses GNNs to integrate both graph-based and sequence-based representations, achieving superior performance compared to state-of-the-art methods on benchmark datasets.

2. **Multi-Objective Molecule Optimization**: The Latent Prompt Transformer developed by Kong et al. ([2024]) incorporates latent prompts within a unified architecture to achieve state-of-the-art performance in multi-objective molecule optimization and drug-like molecule generation.

These applications leverage the ability of GNNs to process and analyze complex 

In [4]:
# –ó–∞–ø—Ä–æ—Å
user_query = "How are Graph Neural Networks used in social media?"

answer = rag.answer(user_query, top_k=5)

print("\n" + "="*40)
print("OTBET:")
print(answer)
print("="*40)

üîç –ò—â—É –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏—é...
‚è±Ô∏è  Retrieval (Encode + Search): 0.2035 —Å–µ–∫
‚úçÔ∏è  –ì–µ–Ω–µ—Ä–∏—Ä—É—é –æ—Ç–≤–µ—Ç...
‚è±Ô∏è  Generation (LLM): 7.5100 —Å–µ–∫
‚è±Ô∏è  Total: 7.72 —Å–µ–∫

OTBET:
Based on the information provided in Document [4], Graph Neural Networks (GNNs) are being utilized in the field of knowledge-graph-guided language understanding, particularly within collaborative multi-agent question answering systems. Specifically, they are described as "large-scale pretrained GNN backbones" that aim to capture broadly reusable structural/semantic patterns. This suggests their application in generating synthetic graphs from text data, which can then be used to inform or enhance language model routing decisions. The use of GNNs here appears to leverage their ability to process relational data effectively, making them suitable for tasks involving complex interactions between entities represented as nodes in a graph structure.
