In [None]:
#Needed libraries installation (run it once)
#%pip install -r requirements.txt

In [None]:
import os

os.environ["OPENAI_API_KEY"] = "NA"

In [None]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3.1", request_timeout=4000.0)

In [None]:
from llama_index.core import SimpleDirectoryReader


documents = SimpleDirectoryReader("data_ua_conf").load_data()



In [None]:
from transformers import AutoModel, AutoTokenizer, XLMRobertaTokenizer

save_directory = "./local_embedding_model_ua"

In [None]:
# # Download local embedding model, should be ran only once

# model_name = "ukr-models/xlm-roberta-base-uk"

# tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
# model = AutoModel.from_pretrained(model_name)
# # Save to local directory

# tokenizer.save_pretrained(save_directory)
# model.save_pretrained(save_directory)

In [None]:
# Load local model 
# Load from local directory
tokenizer = AutoTokenizer.from_pretrained(save_directory)
model = AutoModel.from_pretrained(save_directory)

In [None]:
#create embedding function
from llama_index.core.embeddings import BaseEmbedding
import torch

class LocalEmbedding(BaseEmbedding):
    def __init__(self, model, tokenizer):
        super().__init__()
        self._model = model  # Explicitly define with a private attribute
        self._tokenizer = tokenizer

    def get_text_embedding(self, text):
        # Tokenize with truncation to the model's maximum length
        inputs = self._tokenizer(text, return_tensors="pt", max_length=512, truncation=True)
        with torch.no_grad():
            outputs = self._model(**inputs)
        # Compute mean of the last hidden state as embedding
        embedding = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
        return embedding

    # Implement required methods
    def _get_query_embedding(self, query):
        return self.get_text_embedding(query)

    def _get_text_embedding(self, text):
        return self.get_text_embedding(text)

    async def _aget_query_embedding(self, query):
        return self.get_text_embedding(query)

# Initialize custom embedding with your model and tokenizer
embedding_model = LocalEmbedding(model=model, tokenizer=tokenizer)


In [None]:
from llama_index.core import VectorStoreIndex, Settings


# Configure global settings
Settings.llm = llm
Settings.embed_model = embedding_model
# Set the chunk size (number of tokens per chunk)
Settings.chunk_size = 1024
# Set the chunk overlap (number of tokens overlapping between chunks)
Settings.chunk_overlap = 128
index = VectorStoreIndex.from_documents(documents, show_progress=True)

In [None]:
# Set up the retriever from your index
top_k = 5
retriever = index.as_retriever(similarity_top_k=top_k)


In [None]:
def get_augmented_prompt(query):
    retrieved_documents = retriever.retrieve(query)
    # Augment the prompt by including the retrieved documents
    # Constructing an augmented prompt with context from retrieved documents
    augmented_prompt = f"Context:\n"
    for index, doc in enumerate(retrieved_documents):
        text = doc.text
        print(f"Retrived {index+1}: {text} \n")
        augmented_prompt += f"{text}\n"
    augmented_prompt += f"\nQuestion: {query}\nPlease provide a concise and accurate answer in ukrainian based on the context."
    return augmented_prompt


In [None]:
from IPython.display import Markdown, display

# Send the augmented prompt to a language model for a synthesized response
def get_RAG_response(query):    
    response = llm.complete(get_augmented_prompt(query))
    return response


In [None]:
query = "створення системи цивільновійськового співробітництва"
response = get_RAG_response(query)
display(Markdown(f"## <b>Response:</b> \n #### {response}"))
