In [4]:
from toolva import Toolva
from toolva.utils import (
    PromptTemplate,
    LogCapture,
    TokenLimiter
)

 

In [7]:
from services.travel_itinerary_generator_agent import (
    TIGAgentFactory
)

from services.memory_manager import (
    MemoryManagerFactory
)

In [15]:
import json
from typing import AsyncGenerator, List

import asyncio
import pytz
from datetime import datetime
from toolva import Toolva
from toolva.utils import (
    PromptTemplate,
    LogCapture,
    TokenLimiter
)

from services.utils import ResponsePreprocessor
from services.tools import (
    travel_info_collector,
    travelDestinationRetrieval,
    travel_itinerary_generator
)


class TIGAgentFactory:

    def __init__(self, config: dict):
        self.config = config
        self._setup()

    def _setup(self):
        # Strategy Planner
        self.planner = Toolva(
            tool="text_generation", 
            src="openai",
            model=self.config.get("planner_ai_model", "gpt-4"),
            prompt_template=PromptTemplate().load(self.config.get("planner_template")), 
            max_tokens=self.config.get("planner_max_tokens", 800),
            temperature=self.config.get("planner_temperature", 0.), 
            top_p=self.config.get("planner_top_p", 1.0),
            frequency_penalty=self.config.get("planner_frequency_penalty", 0.), 
            presence_penalty=self.config.get("planner_presence_penalty", 0.), 
            async_mode=True
        )
        
        # Itinerary Generator
        self.generator = Toolva(
            tool="text_generation", 
            src="openai",
            model=self.config.get("generator_ai_model", "gpt-4"),
            prompt_template=PromptTemplate().load(self.config.get("generator_template")), 
            max_tokens=self.config.get("generator_max_tokens", 800), 
            temperature=self.config.get("generator_temperature", 1.0), 
            top_p=self.config.get("generator_top_p", 1.0),
            frequency_penalty=self.config.get("generator_frequency_penalty", 0.), 
            presence_penalty=self.config.get("generator_presence_penalty", 0.), 
            stream=True
        )
        
        from core import SingletonRetriever, SingletonSummarizer
        
        self.summarizer = SingletonSummarizer().get_summarizer()
        
        retriever = SingletonRetriever().get_retriever()
        
        tokenizer = Toolva(
            tool="tokenization", 
            src="openai", 
            model=self.config["travel_destination_retriever"].get("generator_ai_model", "gpt-4")
        )
        token_limiter = TokenLimiter(
            tokenizer=tokenizer, 
            max_tokens=self.config["travel_destination_retriever"].get("max_tokens", 3000)
        )
        
        destination_retrieval = travelDestinationRetrieval(retriever, token_limiter)
        
        self.tools = {
            "travel_info_collector": lambda planner_result: travel_info_collector(planner_result),
            "travel_destination_retriever": lambda memory, kwargs: destination_retrieval.retrieve(
                memory=memory,
                query=kwargs.get("query"),
                vector_field=self.config["travel_destination_retriever"].get("vector_field"),
                index_n=self.config["travel_destination_retriever"].get("index_name"),
                top_k=kwargs.get("top_k"),
                exclude=kwargs.get("exclude"),
                source_fields=self.config["travel_destination_retriever"].get("source_fields"),
                key_field=self.config["travel_destination_retriever"].get("key_field"),
                value_fields=self.config["travel_destination_retriever"].get("value_fields")
            ),
            "travel_itinerary_generator": lambda input_data: travel_itinerary_generator(input_data)
        }

    def load(self):
        return TravelItineraryGeneratorAgent(
            self.planner,
            self.generator,
            self.summarizer,
            self.tools,
            self.config.get("first_message"),
            self.config.get("error_message")
        )

class TravelItineraryGeneratorAgent:
    
    def __init__(
        self,
        planner,
        generator,
        summarizer,
        tools,
        first_message,
        error_message
    ) -> None:
        self.planner = planner
        self.generator = generator
        self.summarizer = summarizer
        self.tools = tools
        self.first_message = first_message
        self.error_message = error_message
        
        self.preprocessor = ResponsePreprocessor()
        self.korea_time = pytz.timezone('Asia/Seoul')
        self.llm_log = LogCapture("toolva.models.base")
    
    async def run(self, question: str, memory_manager: bool) -> AsyncGenerator[str, None]:
        session_id = memory_manager.session_id
        memory = memory_manager.get_data()
        
        today_date = datetime.now(self.korea_time).strftime('%Y-%m-%dT%H:%M:%S')
        tasks = [
            self.planner(
                today_date=today_date,
                user_info=memory.get("user_info", ""),
                travel_info=memory.get("travel_info", ""),
                history=', '.join(memory.get("history", [])),
                user=memory.get("user_message", ""),
                assistant=memory.get("ai_message", self.first_message),
                question=question
            )
        ]
        
        if memory.get("user_message"):
            tasks.append(
                self.summarizer.asummarize(
                    input=memory.get("user_message"), 
                    output=memory.get("ai_message")
                )
            )
        
        outputs = await asyncio.gather(*tasks)
        
        plan = outputs[0]
        
        try:
            plan = json.loads(plan)

        except json.JSONDecodeError as e:

            yield json.dumps({"message": self.error_message, "session_id": session_id})
        
        steps = plan.keys()
        hits = {}
        for step in steps:
            if step == "message":
                message = plan[step]
                formatted_message = message
                yield json.dumps({
                    "message": message,
                    "session_id": session_id
                })
            
            if step == "travel_info_collector":
                travel_info, message = self.tools[step](plan[step])
                formatted_message = message
                memory["travel_info"] = travel_info
                if message:
                    yield json.dumps({
                        "message": message,
                        "session_id": session_id
                    })
            
            if step == "travel_destination_retriever":
                if type(plan[step]) == list:
                    for param in plan[step]:
                        tasks = [self.tools[step](memory, param)]
                    
                    hits = await asyncio.gather(*tasks)
                    hits = {k: v for hit in hits for k, v in hit.items()}
                else:
                    hits = await self.tools[step](memory, plan[step])
            
            if step == "travel_itinerary_generator":
                previous_hits = self.tools[step](memory, plan[step])
                if previous_hits:
                    hits = previous_hits
                
        itinerary = {}
        if hits:
            messages = self.generator(
                today_date=today_date,
                user_info=memory.get("user_info", {}),
                travel_info=memory.get("travel_info", {}),
                history=', '.join(memory.get("history", [])),
                user=memory.get("user_message", ""),
                assistant=memory.get("ai_message", self.first_message),
                input_data=hits.get("input_data"),
                question=question
            )
            
            message_list = []
            formatted_message_list = []
            collecting_key = False
            temp_key = []
            for item in messages:
                message_list.append(item)
                if "'" in item:
                    item_parts = item.split("'")  # 작은따옴표를 기준으로 item 분할
                    
                    for part in item_parts[:-1]:  # 마지막 부분을 제외하고 모든 부분 처리
                        temp_key.append(part)
                    
                    item = item_parts[-1]  # item을 마지막 부분으로 설정
                    
                    if collecting_key:  # 만약 데이터 수집 중이라면
                        collecting_key = False  # 데이터 수집 종료
                        
                        full_key = self.preprocessor.normalize_text(''.join(temp_key))  # 수집한 데이터를 하나의 문자열로 합치고 정규화
                        
                        try:
                            full_data = self.preprocessor.preprocess_hyperlink(full_key, hits)  # 변환
                        except KeyError:
                            full_data = full_key
                        
                        formatted_message_list.append(full_data)
                        yield json.dumps({
                            "message": full_data,
                            "session_id": session_id
                        })  # 후처리된 데이터 출력
                        formatted_message_list.append(item)
                        yield json.dumps({
                            "message": item,
                            "session_id": session_id
                        })  # 끝나는 작은 따옴표가 포함된 chunk 출력
                        
                        temp_key = []  # 임시 데이터 초기화
                    else:
                        collecting_key = True  # 데이터 수집 시작
                        formatted_message_list.append(item)
                        yield json.dumps({
                            "message": item,
                            "session_id": session_id
                        })  # 시작되는 작은 따옴표가 포함된 chunk 출력
                elif collecting_key:
                    temp_key.append(item)  # 데이터 수집 중일 때 임시 리스트에 항목 추가
                else:
                    formatted_message_list.append(item)
                    yield json.dumps({
                        "message": item,
                        "session_id": session_id
                    })
            
            message = ''.join(message_list)
            formatted_message = ''.join(formatted_message_list)
            itinerary = self.preprocessor.preprocess_itinerary(message, hits)
            if itinerary:
                yield json.dumps({
                    "message": "",
                    "itinerary_id": itinerary['uuid'],
                    "session_id": session_id
                })
        
        new_turn_data = {
            "travel_info": memory.get("travel_info", ""),
            "user_message": question,
            "ai_message": message,
            "formatted_ai_message": formatted_message,
            "input_data": hits,
            "itinerary": itinerary
        }
        
        if len(outputs) > 1:
            memory_manager.index_data(new_turn_data, outputs[1])
        else:
            memory_manager.index_data(new_turn_data)
            
        yield json.dumps({"message": "completed", "session_id": session_id})

In [30]:
bot = TIGAgentFactory(parameters).load()
bot.run()

<__main__.TravelItineraryGeneratorAgent at 0x7f3d600e4e20>

In [10]:
import os


BASE_DIR = os.path.dirname(os.path.abspath("/home/pcn/RnS/test/wslee/project/gildong_temp/app/routers/main_chatbot/main_chatbot.py"))

parameters = {
    "planner_ai_model": "gpt-4",
    "planner_template": os.path.join(BASE_DIR, "strategy_planner.json"),
    "planner_max_tokens": 256,
    "planner_temperature": 0.,
    "planner_top_p": 1.0,
    "planner_frequency_penalty": 0., 
    "planner_presence_penalty": 0.,
    "first_message": (
        "안녕하세요 AI 여행 플래너 '길동이'입니다. "
        "가고 싶은 여행지나 이번 여행에서 즐기고 싶은 특별한 테마가 있다면 말씀해주세요!"
    ),
    "generator_ai_model": "gpt-4",
    "generator_template": os.path.join(BASE_DIR, "itinerary_generator.json"),
    "generator_max_tokens": 800,
    "generator_temperature": 0.,
    "generator_top_p": 1.0,
    "generator_frequency_penalty": 0., 
    "generator_presence_penalty": 0., 
    "error_message": (
        "현재 시스템에 문제가 발생하여 정상적인 서비스 제공이 어렵습니다. "
        "잠시 후 다시 시도해 주시거나 다른 질문을 해주시기 바랍니다. "
        "불편을 드려 대단히 죄송합니다. 감사합니다."
    ),
    "travel_destination_retriever": {
        "index_name": "gildong", 
        "vector_field": "sbert_vector", 
        "max_tokens": 4000, 
        "key_field": "title", 
        "value_fields": [
            "contenttypeid", 
            "overview_summ", 
            "physical_en", 
            "visual_en", 
            "hearing_en"
        ],
        "source_fields": [
            "title", 
            "contenttypeid", 
            "overview_summ", 
            "physical_en", 
            "visual_en", 
            "hearing_en", 
            "location"
        ]
    }
}



In [18]:
from core.auth_utils import  get_user_id
from core.common_config import common_parameters
import uuid
from typing import Union, Optional

class InstanceManager:
    instances = {}
    
    def __init__(self):
        self.memory = MemoryManagerFactory(common_parameters)
        self.agent = TIGAgentFactory(parameters).load()

    def get_instance(self, session_id, auth_header: Optional[str] = None):
        if session_id not in self.instances:
            user_id = get_user_id(auth_header) if auth_header else None
            memory_manager = self.memory.load(session_id, user_id)
            print(memory_manager)
            self.instances[session_id] = memory_manager
            print(self.instances[session_id])
        
        return self.instances[session_id]


bot = TIGAgentFactory(parameters).load()
instance = InstanceManager()

In [29]:
auth_header = "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoyOTg5MjI3NzA0LCJ1c2VyX25hbWUiOiJlbGxhIiwidXNlcl9pbWFnZSI6Imh0dHA6Ly9rLmtha2FvY2RuLm5ldC9kbi9jQk9PREYvYnRzdE1qeG94YXQveUQ2RTNWNnU3UDB2SzBPZTdmdjd1Sy9pbWdfNjQweDY0MC5qcGciLCJ0b2tlbl90eXBlIjoiYWNjZXNzIiwiZXhwIjoxNjk2NTE0NDg5fQ.1dJYWDwiEMvH6y-czm2MNANpZXa9BD-UWdZqhJT5ohY"

# session_id가 없으면 새 uuid를 생성합니다.
message = {"session_id": None , "question" : "오늘 갈만한 서울 여행지를 추천해줘." }
# print(message.session_id)
session_id = message["session_id"] or str(uuid.uuid4())
print(session_id)
memory = instance.get_instance(session_id, auth_header)

print(memory)
result = bot.run(message["question"], memory)

result

33d3da20-6d13-4202-a299-dcfe6659915e
[2m2023-10-05 15:05:04[0m [[32m[1minfo     [0m] [1mFetched memories successfully.[0m [36mhits_len[0m=[35m0[0m [36msession_id[0m=[35m33d3da20-6d13-4202-a299-dcfe6659915e[0m
<services.memory_manager.MemoryManager object at 0x7f3d66f63070>
<services.memory_manager.MemoryManager object at 0x7f3d66f63070>
<services.memory_manager.MemoryManager object at 0x7f3d66f63070>


<async_generator object TravelItineraryGeneratorAgent.run at 0x7f3cb78245e0>

In [31]:
for attr, value in vars(memory).items():
    print(f"{attr}: {value}")

db: <services.data_manager.ElasticsearchDataManager object at 0x7f3d60126e80>
index_n: gildong_convo
token_limiter: <toolva.utils.token_limiter.TokenLimiter object at 0x7f3d60126be0>
session_id: 33d3da20-6d13-4202-a299-dcfe6659915e
user_id: 2989227704
user_info: {'Email': 'user@example.com', 'age': 33, 'age_group': 30, 'disability_status': False, 'disability_type': None, 'gender': '남', 'prefer_travel': ['car'], 'residence': {'lat': 37.5665, 'lon': 126.978}, 'significant': 'none', 'userID': '2989227704', 'user_name': 'ella', 'user_photo': 'http://k.kakaocdn.net/dn/cBOODF/btstMjxoxat/yD6E3V6u7P0vK0Oe7fv7uK/img_640x640.jpg'}
korea_time: Asia/Seoul
data: {'history': []}
turn: 0


In [32]:
print(memory.data)
print(memory.config)

{'history': []}


AttributeError: 'MemoryManager' object has no attribute 'config'

In [27]:
message = {"session_id": None , "question" : "오늘 갈만한 서울 여행지를 추천해줘." }
print(message["session_id"])

None


In [33]:
#tokenlimiter 사용 
from toolva import Toolva
from toolva.utils import TokenLimiter


In [35]:
tokenizer = Toolva(
            tool="tokenization", 
            src= "tiktoken", 
            model= "cl100k_base"
        )

In [41]:
token_limiter = TokenLimiter(tokenizer=tokenizer, max_tokens=10)

In [42]:
token_limiter.token_counter("오늘 갈만한 서울 여행지 추천해줘")

20

In [44]:
token_limiter.cutoff(input_data = ["오늘 갈만한 서울 여행지 추천해줘"] )

ValueError: The combined length of the new input exceeds the max tokens allowed. Please increase the max_tokens or reduce the length of the input and output.

In [60]:
import jwt
token  = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJ1c2VyX2lkIjoiMjk4ODg5MzYwNSIsInVzZXJfbmFtZSI6ImVsbGEiLCJ1c2VyX2ltYWdlIjoiaHR0cDovL2sua2FrYW9jZG4ubmV0L2RuL2RwazlsMS9idHFtR2hBMmxLTC9PejB3RHVKbjFZVjJESW45MmY2RFZLL2ltZ18xMTB4MTEwLmpwZyIsInRva2VuX3R5cGUiOiJhY2Nlc3MiLCJleHAiOjE2OTY1MzM0MDh9.n-3M9NJdIbpzJODiH1bEAAS3KM5J8ZrKauhwXMppOuM"
payload = jwt.decode(token, 'b059dc72065b42d2de068b952eb3cf1592a5c6604792f5ebed9c4aad087b3113' , algorithms=["HS256"])

In [61]:
payload

{'user_id': '2988893605',
 'user_name': '명주',
 'user_image': 'http://k.kakaocdn.net/dn/dpk9l1/btqmGhA2lKL/Oz0wDuJn1YV2DIn92f6DVK/img_110x110.jpg',
 'token_type': 'access',
 'exp': 1696533531}

In [52]:
test = "ella" 
tt= True


if tt :
    print(test)
    break

if tt == True and test == "ella":
    print(test)

SyntaxError: 'break' outside loop (2325822211.py, line 7)

In [1]:
body = {"doc": {}} 
body["doc"]["user_name"] = "elle"
body

{'doc': {'user_name': 'elle'}}

In [20]:
test = { 
    "domestic_travel_info" : ["001", "es" ], #국내 여행정보, helper num, used_db
    "weather_info" :         ["002", "api" ] #날씨 정보
}

In [24]:
if test.get("domestic_travel_info")[1] == "es":
    print("h")

h


In [None]:
| 월  | 어장명   | 고등어 어획량 (톤) |
|-----|---------|-------------------|
| 1월 | 부산어장 | 5,000             |
| 1월 | 인천어장 | 4,500             |
| 2월 | 제주어장 | 4,800             |
| 2월 | 울산어장 | 4,700             |
| 3월 | 부산어장 | 5,200             |
| 3월 | 인천어장 | 5,100             |
| 4월 | 제주어장 | 5,100             |
| 4월 | 울산어장 | 4,950             |
| 5월 | 부산어장 | 4,900             |
| 5월 | 인천어장 | 4,850             |