In [1]:
# 📦 1. Install Dependencies
!pip install -q sentence-transformers faiss-cpu llama-cpp-python langchain huggingface_hub

In [2]:
# 📥 2. Download Gemma-2 2B GGUF Model (Q4_K_M for speed + accuracy)
from huggingface_hub import hf_hub_download

model_path = hf_hub_download(
    repo_id="bartowski/gemma-2-2b-it-GGUF",
    filename="gemma-2-2b-it-Q4_K_M.gguf"
)
print("✅ Model downloaded to:", model_path)

✅ Model downloaded to: /root/.cache/huggingface/hub/models--bartowski--gemma-2-2b-it-GGUF/snapshots/855f67caed130e1befc571b52bd181be2e858883/gemma-2-2b-it-Q4_K_M.gguf


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [3]:
# 📚 3. Load and Parse Dataset
import json

data = []
with open("/content/genshin_dataset_cleaned.jsonl", "r") as f:
    for line in f:
        items = json.loads(line.strip())
        for item in items:
            for k, v in item.items():
                data.append({"question": k, "answer": v})

print(f"✅ Loaded {len(data)} QA pairs")

✅ Loaded 178 QA pairs


In [4]:
# ✂️ 4. Chunk Text into Small Contexts
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=100)
chunks = []

for entry in data:
    for chunk in splitter.split_text(entry["answer"]):
        chunks.append({"text": chunk, "source": entry["question"]})

print(f"✅ Created {len(chunks)} chunks")

✅ Created 1515 chunks


In [5]:
# 🧠 5. Create Embeddings
from sentence_transformers import SentenceTransformer
import numpy as np

embedder = SentenceTransformer("all-MiniLM-L6-v2")
texts = [chunk["text"] for chunk in chunks]
embeddings = embedder.encode(texts)

# Save for reuse (optional)
np.save("embeddings.npy", embeddings)
with open("chunks.json", "w") as f:
    json.dump(chunks, f)

In [6]:
# 🧭 6. Build FAISS Index
import faiss

index = faiss.IndexFlatL2(embeddings.shape[1])
index.add(np.array(embeddings))
faiss.write_index(index, "genshin_index.faiss")

In [7]:
# 🤖 7. Load LLM
from llama_cpp import Llama

llm = Llama(
    model_path=model_path,
    n_ctx=1024,
    n_threads=4,
    verbose=False
)

llama_context: n_ctx_per_seq (1024) < n_ctx_train (8192) -- the full capacity of the model will not be utilized


In [8]:
# 🔍 Improved 8. Retrieval + Prompt Function
with open("chunks.json") as f:
    chunks = json.load(f)
index = faiss.read_index("genshin_index.faiss")

def retrieve_context(query, top_k=4, max_chars=400, min_score=0.65):
    query_vec = embedder.encode([query])
    D, I = index.search(np.array(query_vec), top_k)
    context_chunks = []

    for score, idx in zip(D[0], I[0]):
        sim = 1 - score / 4  # pseudo-normalized L2 to cosine
        if sim >= min_score:
            chunk = chunks[idx]["text"].strip()
            if chunk and chunk not in context_chunks:
                context_chunks.append(chunk[:max_chars])

    return "\n\n".join(context_chunks[:top_k])

def build_prompt(query, context):
    return f"""You are a concise, factual Genshin Impact assistant.

Context:
{context}

Question: {query}

Answer:"""


In [10]:
import time

def ask(query, max_retries=3, min_score=0.7):
    for attempt in range(max_retries):
        context = retrieve_context(query, min_score=min_score)
        print(f"[Attempt {attempt + 1}] Context length: {len(context)}")
        if context.strip():
            prompt = build_prompt(query, context)
            print(f"[Prompt Preview]: {prompt[:300]}...")  # Optional debug preview

            response = llm(prompt, max_tokens=256)  # Removed stop tokens
            text = response["choices"][0]["text"].strip()

            if text:
                return text
        time.sleep(1)
        min_score -= 0.1  # Relax threshold for fuzzy matches

    return "💬 Sorry, I don't have relevant information."

# Example use
query = "Nahida abilities"
answer = ask(query)
print("\n💬 Response:")
print(answer)


[Attempt 1] Context length: 1106
[Prompt Preview]: You are a concise, factual Genshin Impact assistant.

Context:
Tell me about Nahida

brings top-tier utility and damage support to virtually any Dendro-based team. Overall, Nahida is a powerful, easy-to-use character that fits into a wide variety of team comps and enables some of the strongest eleme...

💬 Response:
Nahida's abilities focus on Dendro, making her ideal for teams focused on the element.

* **Elemental Skill:**  "Vajra Vihari" - Creates a Chakra Sphere that can deal Dendro damage and pull enemies towards it. This skill can be used to both deal damage and disrupt enemy positioning.
* **Elemental Burst:**  "Dendro Wanderer" - This skill creates a "Dendro Wanderer" that circles the user, dealing Dendro damage to enemies and providing a shield and heals.
* **Passive Talents:** - Nahida's passive talents enhance her Dendro damage and healing abilities.
    
Please note: This information is based on current knowledge and could c