In [None]:
### –ù–£–ñ–ù–´–ï –ò–ú–ü–û–†–¢–´
!pip install python-dotenv==1.0.1 mistralai langchain_huggingface langchain_community chromadb

In [None]:
from dotenv import load_dotenv
from IPython.display import display, Markdown
import re
import os
from mistralai import Mistral

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from tqdm import tqdm
import json

#collab like
# from google.colab import userdata

## 1. USER QUERY

In [None]:
# –ü–æ–∫–∞ –≤–æ–ø—Ä–æ—Å –≤ –≤–∏–¥–µ —Å—Ç—Ä–æ–∫–∏, –ø–æ—Ç–æ–º –≤–∏–¥–∏–º–æ —Ö–æ—Ç–∏–º —Å–¥–µ–ª–∞—Ç—å –∫–∞–∫ .py —Å–∫—Ä–∏–ø—Ç
user_query = "–ö–∞–∫ –∑–∞—Å–µ–ª–∏—Ç—å –ù–ò–ü?"

## 2. RETRIEVAL
#### 2.1 BUILDING DOCS EMBEDDINGS

In [None]:
with open('wiki_dump_test_without_temps.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

inputs = [item["content"] for item in data if "content" in item]

In [None]:
documents = [Document(page_content=text) for text in inputs]

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
chunks = text_splitter.split_documents(documents)

In [None]:
embeddings = HuggingFaceEmbeddings(
    model_name="intfloat/multilingual-e5-large",
    model_kwargs={"device": "cuda"}
)

In [None]:
vectorstore = Chroma.from_documents(chunks, embeddings, persist_directory="./terraria_db")

#### 2.2 MAKE RETRIEVER

In [None]:
retriever = vectorstore.as_retriever(search_kwargs={"k": 5})

#### 2.3 SEARCH

In [None]:
results = vectorstore.similarity_search_with_score(user_query, k=5)
docs = []

for (doc, score) in results:
    docs.append(doc.page_content)

## 3. PROMPT BUILDING



In [None]:
from google.colab import userdata

# Access the secret
api_key = userdata.get('MISTRAL_API_KEY')

if not api_key:
    raise ValueError("‚ùå MISTRAL_API_KEY not found in Colab secrets!")
else:
    print("‚úÖ API key loaded successfully from Colab secrets!")

# Initialize Mistral client
client = Mistral(api_key=api_key)

# Test connection
def test_connection():
    try:
        models = client.models.list()
        print("‚úÖ Connected successfully!")
        print(f"Available models: {[m.id for m in models.data]}")
    except Exception as e:
        print(f"‚ùå Connection failed: {e}")
        print("üí° If key is not active yet, wait a few minutes and try again")

test_connection()

In [None]:
def run_mistral(messages, user_format=True, model="mistral-medium-latest"):
    client = Mistral(api_key=api_key)
    if user_format:
        messages = [
            {"role":"user", "content":messages}
        ]
    chat_response = client.chat.complete(
        model=model,
        messages=messages
    )
    return chat_response.choices[0].message.content

In [None]:
def user_messageRAG(query, docs):
    user_message = (
        f"""
        –¢—ã —ç–∫—Å–ø–µ—Ä—Ç –ø–æ –∏–≥—Ä–µ Terraria, —Ç—ã –¥–æ–ª–∂–µ–Ω –æ—Ç–≤–µ—Ç–∏—Ç—å –Ω–∞ –≤–æ–ø—Ä–æ—Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è

        ####
        –í–æ—Ç –Ω–µ–º–Ω–æ–≥–æ –≤—Å–ø–æ–º–æ–≥–∞—Ç–µ–ª—å–Ω–æ–π –∏–Ω—Ñ–æ—Ä–º–∞—Ü–∏–∏:

        <<<
        INFO: {docs[0]}
        >>>
        <<<
        INFO: {docs[1]}
        >>>
        <<<
        INFO: {docs[2]}
        >>>
        <<<
        INFO: {docs[3]}
        >>>
        <<<
        INFO: {docs[4]}
        >>>
        ###

        <<<
        Query: {query}
        >>>
        """
    )
    return user_message

## 4. LLM ANSWER

In [None]:
user_query = "–ö–∞–∫ —Å–∫—Ä–∞—Ñ—Ç–∏—Ç—å –∑–µ–ª—å–µ –ø–æ–¥–≤–æ–¥–Ω–æ–≥–æ –¥—ã—Ö–∞–Ω–∏—è?"

In [None]:
model_answer = run_mistral(user_messageRAG(user_query, docs))


In [None]:
print(model_answer)

In [None]:
def user_message(query):
    user_message = (
        f"""
        –¢—ã —ç–∫—Å–ø–µ—Ä—Ç –ø–æ –∏–≥—Ä–µ Terraria, —Ç—ã –¥–æ–ª–∂–µ–Ω –æ—Ç–≤–µ—Ç–∏—Ç—å –Ω–∞ –≤–æ–ø—Ä–æ—Å –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—è

        <<<
        Query: {query}
        >>>
        """
    )
    return user_message

In [None]:
model_answer = run_mistral(user_message(user_query))
print(model_answer)