In [1]:
### –ù–£–ñ–ù–´–ï –ò–ú–ü–û–†–¢–´
!pip install python-dotenv==1.0.1 mistralai langchain_huggingface langchain_community chromadb



In [11]:
from dotenv import load_dotenv
from IPython.display import display, Markdown
import re
import os
from mistralai import Mistral

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from tqdm import tqdm
import json

#collab like
# from google.colab import userdata

## 1. USER QUERY

In [3]:
# –ü–æ–∫–∞ –≤–æ–ø—Ä–æ—Å –≤ –≤–∏–¥–µ —Å—Ç—Ä–æ–∫–∏, –ø–æ—Ç–æ–º –≤–∏–¥–∏–º–æ —Ö–æ—Ç–∏–º —Å–¥–µ–ª–∞—Ç—å –∫–∞–∫ .py —Å–∫—Ä–∏–ø—Ç
user_query = "–ö–∞–∫ –∑–∞—Å–µ–ª–∏—Ç—å –ù–ò–ü?"

## 2. RETRIEVAL
#### 2.1 BUILDING DOCS EMBEDDINGS

In [4]:
with open('wiki_dump_test_without_temps.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

inputs = [item["content"] for item in data if "content" in item]

In [5]:
documents = [Document(page_content=text) for text in inputs]

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
chunks = text_splitter.split_documents(documents)

In [6]:
embeddings = HuggingFaceEmbeddings(
    model_name="intfloat/multilingual-e5-large",
    model_kwargs={"device": "cuda"}
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [7]:
vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./terraria_db")

#### 2.2 MAKE RETRIEVER

In [8]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

#### 2.3 SEARCH

In [9]:
results = vectorstore.similarity_search_with_score(user_query, k=5)
docs = []

for (doc, score) in results:
    docs.append(doc.page_content)

## 3. PROMPT BUILDING



In [12]:
from google.colab import userdata

# Access the secret
api_key = userdata.get('MISTRAL_API_KEY')

if not api_key:
    raise ValueError("‚ùå MISTRAL_API_KEY not found in Colab secrets!")
else:
    print("‚úÖ API key loaded successfully from Colab secrets!")

# Initialize Mistral client
client = Mistral(api_key=api_key)

# Test connection
def test_connection():
    try:
        models = client.models.list()
        print("‚úÖ Connected successfully!")
        print(f"Available models: {[m.id for m in models.data]}")
    except Exception as e:
        print(f"‚ùå Connection failed: {e}")
        print("üí° If key is not active yet, wait a few minutes and try again")

test_connection()

‚úÖ API key loaded successfully from Colab secrets!
‚úÖ Connected successfully!
Available models: ['mistral-medium-2505', 'mistral-large-latest', 'mistral-medium-2508', 'mistral-medium-latest', 'mistral-medium', 'ministral-3b-2410', 'ministral-3b-latest', 'ministral-8b-2410', 'ministral-8b-latest', 'open-mistral-7b', 'mistral-tiny', 'mistral-tiny-2312', 'open-mistral-nemo', 'open-mistral-nemo-2407', 'mistral-tiny-2407', 'mistral-tiny-latest', 'mistral-large-2411', 'pixtral-large-2411', 'pixtral-large-latest', 'mistral-large-pixtral-2411', 'codestral-2508', 'codestral-latest', 'devstral-small-2507', 'devstral-small-latest', 'devstral-medium-2507', 'devstral-medium-latest', 'mistral-vibe-cli-latest', 'pixtral-12b-2409', 'pixtral-12b', 'pixtral-12b-latest', 'mistral-small-2506', 'mistral-small-latest', 'magistral-medium-2509', 'magistral-medium-latest', 'magistral-small-2509', 'magistral-small-latest', 'voxtral-mini-2507', 'voxtral-mini-latest', 'voxtral-small-2507', 'voxtral-small-latest

In [13]:
def run_mistral(messages, user_format=True, model="mistral-medium-latest"):
    client = Mistral(api_key=api_key)
    if user_format:
        messages = [
            {"role":"user", "content":messages}
        ]
    chat_response = client.chat.complete(
        model=model,
        messages=messages
    )
    return chat_response.choices[0].message.content

In [18]:
def user_messageRAG(query, docs):
    user_message = (
        f"""
        –¢—ã —ç–∫—Å–ø–µ—Ä—Ç –ø–æ –∏–≥—Ä–µ Terraria, —Ç—ã –¥–æ–ª–∂–µ–Ω –æ—Ç–≤–µ—Ç–∏—Ç—å –Ω–∞ –≤–æ–ø—Ä–æ—Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è

        ####
        –í–æ—Ç –Ω–µ–º–Ω–æ–≥–æ –≤—Å–ø–æ–º–æ–≥–∞—Ç–µ–ª—å–Ω–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏:

        <<<
        INFO: {docs[0]}
        >>>
        <<<
        INFO: {docs[1]}
        >>>
        <<<
        INFO: {docs[2]}
        >>>
        <<<
        INFO: {docs[3]}
        >>>
        <<<
        INFO: {docs[4]}
        >>>
        ###

        <<<
        Query: {query}
        >>>
        """
    )
    return user_message

## 4. LLM ANSWER

In [21]:
user_query = "–ö–∞–∫ —Å–∫—Ä–∞—Ñ—Ç–∏—Ç—å –∑–µ–ª—å–µ –ø–æ–¥–≤–æ–¥–Ω–æ–≥–æ –¥—ã—Ö–∞–Ω–∏—è?"

In [22]:
model_answer = run_mistral(user_messageRAG(user_query, docs))


In [23]:
print(model_answer)

–í *Terraria* **–∑–µ–ª—å–µ –ø–æ–¥–≤–æ–¥–Ω–æ–≥–æ –¥—ã—Ö–∞–Ω–∏—è** (*Gills Potion*) –ø–æ–∑–≤–æ–ª—è–µ—Ç –¥—ã—à–∞—Ç—å –ø–æ–¥ –≤–æ–¥–æ–π –≤ —Ç–µ—á–µ–Ω–∏–µ **2 –º–∏–Ω—É—Ç** (–∏–ª–∏ **4 –º–∏–Ω—É—Ç** —Å –∞–∫—Å–µ—Å—Å—É–∞—Ä–æ–º *–ê–ª—Ö–∏–º–∏—á–µ—Å–∫–∏–π —Ü–≤–µ—Ç–æ–∫* –∏–ª–∏ *–§–ª—è–∂–∫–∞ –¥–ª—è –∑–µ–ª–∏–π*).
–î–ª—è –µ–≥–æ —Å–æ–∑–¥–∞–Ω–∏—è –ø–æ—Ç—Ä–µ–±—É—é—Ç—Å—è:

### **–†–µ—Ü–µ–ø—Ç –∫—Ä–∞—Ñ—Ç–∞** (–Ω–∞ –≤–µ—Ä—Å—Ç–∞–∫–µ –∏–ª–∏ –∞–ª—Ö–∏–º–∏—á–µ—Å–∫–æ–º —Å—Ç–æ–ª–µ):
- **2 √ó** *–ú–æ—Ä—Å–∫–æ–π —Å–æ—Ä–Ω—è–∫* (*Seaweed*) ‚Äì —Å–æ–±–∏—Ä–∞–µ—Ç—Å—è –ø–æ–¥ –≤–æ–¥–æ–π (–≤ –æ–∫–µ–∞–Ω–µ, –¥–∂—É–Ω–≥–ª—è—Ö –∏–ª–∏ –ª—é–±–æ–º –≤–æ–¥–æ—ë–º–µ).
- **1 √ó** *–ñ–∞–±—Ä—ã* (*Gills*) ‚Äì –¥—Ä–æ–ø —Å **–ö–æ—Ä–æ–ª—è —Å–ª–∏–∑–Ω–µ–π** (*King Slime*, 25% —à–∞–Ω—Å) –∏–ª–∏ **–ü–∏—Ä–∞–Ω—å–∏** (1% —à–∞–Ω—Å –≤ –¥–∂—É–Ω–≥–ª—è—Ö).
- **1 √ó** *–ë—É—Ç—ã–ª–∫–∞* (*Bottle*) ‚Äì –∫—Ä–∞—Ñ—Ç–∏—Ç—Å—è –∏–∑ **—Å—Ç–µ–∫–ª–∞** (2 –ø–µ—Å–∫–∞ ‚Üí 1 —Å—Ç–µ–∫–ª–æ ‚Üí 1 –±—É—Ç—ã–ª–∫–∞).
- **1 √ó** *–°–≤–µ—Ç—è—â–∏–π—Å—è –≥—Ä–∏–±* (*Glowing Mushroom*) 

In [24]:
def user_message(query):
    user_message = (
        f"""
        –¢—ã —ç–∫—Å–ø–µ—Ä—Ç –ø–æ –∏–≥—Ä–µ Terraria, —Ç—ã –¥–æ–ª–∂–µ–Ω –æ—Ç–≤–µ—Ç–∏—Ç—å –Ω–∞ –≤–æ–ø—Ä–æ—Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è

        <<<
        Query: {query}
        >>>
        """
    )
    return user_message

In [25]:
model_answer = run_mistral(user_message(user_query))
print(model_answer)

–í **Terraria** **–ó–µ–ª—å–µ –ø–æ–¥–≤–æ–¥–Ω–æ–≥–æ –¥—ã—Ö–∞–Ω–∏—è (Gills Potion)** –ø–æ–∑–≤–æ–ª—è–µ—Ç –¥—ã—à–∞—Ç—å –ø–æ–¥ –≤–æ–¥–æ–π –≤ —Ç–µ—á–µ–Ω–∏–µ **2 –º–∏–Ω—É—Ç** (–∏–ª–∏ –¥–æ–ª—å—à–µ —Å –∞–∫—Å–µ—Å—Å—É–∞—Ä–∞–º–∏, —É–≤–µ–ª–∏—á–∏–≤–∞—é—â–∏–º–∏ –ø—Ä–æ–¥–æ–ª–∂–∏—Ç–µ–ª—å–Ω–æ—Å—Ç—å –∑–µ–ª–∏–π). –í–æ—Ç –∫–∞–∫ –µ–≥–æ —Å–∫—Ä–∞—Ñ—Ç–∏—Ç—å:

---

### **–ò–Ω–≥—Ä–µ–¥–∏–µ–Ω—Ç—ã –¥–ª—è –∫—Ä–∞—Ñ—Ç–∞ (1 –∑–µ–ª—å–µ):**
| –ü—Ä–µ–¥–º–µ—Ç                     | –ö–æ–ª–∏—á–µ—Å—Ç–≤–æ | –ì–¥–µ –¥–æ–±—ã—Ç—å                                                                 |
|------------------------------|------------|---------------------------------------------------------------------------|
| **–ñ–∞–±—Ä—ã (Gill)**             | 1          | –í—ã–ø–∞–¥–∞–µ—Ç —Å **–ö–æ—Ä–æ–ª—è –°–ª–∏–∑–Ω–µ–π (King Slime)** (—à–∞–Ω—Å 33.33% –≤ –æ–±—ã—á–Ω–æ–º —Ä–µ–∂–∏–º–µ, 100% –≤ —Ä–µ–∂–∏–º–µ –≠–∫—Å–ø–µ—Ä—Ç/–ú–∞—Å—Ç–µ—Ä). |
| **–ë–æ—Ç–≤–∞ (Blinkroot)**        | 1          | –†–∞—Å—Ç—ë—Ç –≤ **–ü–æ–¥–∑–µ–º–Ω—ã—Ö –¥–∂—É–Ω–≥–ª—è—Ö