In [13]:
import os, chromadb, google.generativeai as genai, numpy as np
from sentence_transformers import SentenceTransformer
from chromadb.config import Settings

In [14]:
COLL_NAME = 'StackOverflowQnA'
CHROMA_PATH = '../lab5/chroma_db'
MODEL_NAME = 'all-MiniLM-L6-v2'
LLM_NAME = 'gemini-2.0-flash'
SYSTEM_MSG = ('You are Stack Overflow Assistant. '
              'You answer questions based on the knowledge snippets provided. ')
with open('key.txt', 'r') as f:
    GOOGLE_API_KEY = f.read().strip()

In [15]:
model = SentenceTransformer(MODEL_NAME)

client = chromadb.PersistentClient(path=CHROMA_PATH, settings=Settings(anonymized_telemetry=False))
col = client.get_collection(COLL_NAME)

genai.configure(api_key=GOOGLE_API_KEY)
gemini = genai.GenerativeModel(LLM_NAME)



In [16]:
print('Vectors in Chroma:', col.count())

Vectors in Chroma: 1264216


In [17]:
def semantic_search(query: str, k: int = 5):
    q_emb = model.encode([query], normalize_embeddings=True)[0]
    res = col.query(query_embeddings=[q_emb.tolist()], n_results=k, include=['distances', 'metadatas'])
    hits = []
    for dist, meta in zip(res['distances'][0], res['metadatas'][0]):
        hits.append({'answer': meta.get('answer', ''), 'score': 1 - dist, 'title': meta.get('title', '')})
    return hits

In [18]:
# Тестируем
for h in semantic_search('How to convert a string to a list in Python?', k=1):
    print(f"Title:  {h['title']}\nAnswer: {h['answer']}\nScore:  {h['score']:.3f}\n")

Title:  Assign part of a string to a variable [Python]
Answer: easy python fun object actually string list safely convert list
Score:  0.770



In [19]:
def make_prompt(user_q: str, ctx: list[dict]) -> str:
    parts = [f'User question:\n{user_q}\n',
             'Knowledge snippets:']

    for i, s in enumerate(ctx, 1):
        txt = (s['answer'][:500] + '…') if len(s['answer']) > 500 else s['answer']
        parts.append(f'[{i}] {txt}')
    parts.append('\nCompose a concise, correct answer citing the snippets.')
    return '\n\n'.join(parts)

In [20]:
def rag(user_q: str, k_ctx: int = 3, temperature: float = 0.2):
    ctx = semantic_search(user_q, k_ctx)
    user_prompt = make_prompt(user_q, ctx)
    # Скомбинируем системное сообщение и пользовательский запрос для моделей, которые не поддерживают системную роль
    full_prompt = f'{SYSTEM_MSG}\n\n{user_prompt}'
    resp = gemini.generate_content(
        [{'role': 'user', 'parts': [full_prompt]}],
        generation_config={'temperature': temperature, 'max_output_tokens': 512}
    )
    return resp.text, ctx

In [30]:
tests = [
    'How to convert string to int in Python?',
    'Как создать commit в Git, если нет изменений?'
]

for q in tests:
    answer, ctx = rag(q)
    print('?', q)
    for i, doc in enumerate(ctx, 1):
        answer_snippet = (doc['answer'][:70] + '...') if len(doc['answer']) > 70 else doc['answer']
        print(f"  [ctx{i}] → {answer_snippet}  (score {doc['score']:.3f})")
    print('! Ответ Gemini:\n', answer, '\n' + '-' * 100 + '\n')

? How to convert string to int in Python?
  [ctx1] → would recommend use try catch also use module  (score 0.730)
  [ctx2] → easy way first convert sure fractional part always zero faster would u...  (score 0.715)
  [ctx3] → return python always return string convert result integer explicitly d...  (score 0.685)
! Ответ Gemini:
 To convert a string to an integer in Python, you can explicitly convert the result to an integer [3]. It's also recommended to use a try-except block for error handling [1]. If you're sure the fractional part is always zero, converting to a float first might be faster [2].
 
----------------------------------------------------------------------------------------------------

? Как создать commit в Git, если нет изменений?
  [ctx1] → two option provide empty commit message new commit yet save message si...  (score 0.449)
  [ctx2] → stage change tracked file include modification deletion thing also sta...  (score 0.431)
  [ctx3] → see answer brian riehman pat not

Выводы:
- Retrieval резко сокращает галлюцинации, то есть Gemini держится фактов из сниппетов
- Ключевой параметр — качество эмбеддингов и ранжирование. Иногда среди top‑3 нет прямого решения и LLM ошибается

Ограничения:
- Контекстное окно — 32k+ токенов достаточно, но лучше сжимать сниппеты
- Бесплатная квота Gemini примерно 50RPS / 1M tokens в месяц

Улучшения:
1. Увеличить базу знаний
2. Добавить больше сниппетов
3. Использовать более мощную модель эмбеддингов (например, `all-MiniLM-L12-v2`)
4. Использовать более мощную модель LLM (например, `gemini-2.0-advanced`)
5. Сделать поиск лучше, возможно эмбеддинг по `CleanTitle` и добавить теги
