In [3]:
from together import Together
import json
from dotenv import load_dotenv
import os
load_dotenv()  

from llama_index.core import SimpleDirectoryReader

from llama_index.core import VectorStoreIndex
from llama_index.core.retrievers import QueryFusionRetriever


ModuleNotFoundError: No module named 'together'

In [2]:
class MultiturnRAG:
    def __init__(self,model, client, document_path: list, context_length : int = 20):
        self.document_path = document_path
        self.history = []
        self.context_length = context_length
        self.model = model
        self.client = client

    def initial_retriever(self):

        all_doc = []
        for path in self.document_path:
            document = SimpleDirectoryReader(
                input_files=[path]
            ).load_data()
            index = VectorStoreIndex.from_documents(document)
            all_doc.append(index)

        self.retriever  = QueryFusionRetriever(all_doc, 
                                               similarity_top_k=2, 
                                               num_queries=4,  # set this to 1 to disable query generation 
                                               use_async=True, 
                                               verbose=True,)


    def prepare_context(self, query):

        context = self.retriever.retrieve(query, top_k=self.context_length)
        return context
    
    def update_history(self, user_input, bot_response):
        self.history.append({"role": "user", "content": user_input})
        self.history.append({"role": "assistant", "content": bot_response})
        
    def generate_response(self, input, context):
        message = [{"role": "system", "content": "You are a helpful assistant. You communicate in Thai language."}]

        for history in self.history:
            message.append(history)

        message.append({"role": "user", "content": input})
        print(message)

        response = self.client.chat.completions.create(
            model=self.model,
            messages=message,
            max_tokens=64,
            temperature=0.7,
            top_p=0.7,
            top_k=50,
            repetition_penalty=1,
            stop=["[/INST]","</s>"],
        )
        print("Get response")

        return dict(dict(dict(response)['choices'][0])['message'])['content'].split("im_end")[0].replace("im_start", "").replace("assistant","").replace("<","").replace(">","").replace("|","")
            
    def get_response(self, user_input):
        response = self.generate_response(input = user_input, context="Normal Conversation")
        self.update_history(user_input = user_input,bot_response = response)
        return response


In [None]:
document = ["./doc/attention.pdf"]
    model = "meta-llama/Meta-Llama-3-8B-Instruct-Lite"
    #Use for get api with llm
    client = Together(api_key=os.environ.get('TOGETHER_API_KEY'))
    # Start RAG OOP
    rag = MultiturnRAG(document_path=document, model=model, client=client)
    Input = " "
    print("Start : \n")
    while(Input != "Exit"):
        Input = str(input())
        response = rag.get_response(Input)
        print(response)


In [11]:
!python -m ipykernel install --user --name=env_llama

Installed kernelspec env_llama in C:\Users\ชยพัทธ์\AppData\Roaming\jupyter\kernels\env_llama
