In [53]:
import os
from dotenv import load_dotenv

load_dotenv()
openai_api_key = os.getenv("openai")

In [66]:
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate, LLMChain
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    AIMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

from langchain.callbacks import get_openai_callback

from langchain.schema import (
    AIMessage,
    HumanMessage,
    SystemMessage
)

In [56]:
chat = ChatOpenAI(temperature=1,
                    # max_tokens=,
                    # model_name='text-davinci-003',
                    openai_api_key=openai_api_key)

In [57]:
prompt=PromptTemplate(
    template="You are an assistant who helps me brainstorm \
    to find creative topics using the data given to me.",
    input_variables=[],
)
system_message_prompt = SystemMessagePromptTemplate(prompt=prompt)

In [8]:
prompt=PromptTemplate(
    template="{text}",
    input_variables=["text"],
)
human_message_prompt = HumanMessagePromptTemplate(prompt=prompt)

In [9]:
chat_prompt = ChatPromptTemplate.from_messages([system_message_prompt, human_message_prompt])

In [10]:
chain = LLMChain(llm=chat, prompt=chat_prompt)

In [60]:
def prompt_text(data_prompt:str, field:str, purpose:str, num_topics:int) -> str:
    prompt = f"Generate {num_topics} topics in the {field} field for the following purpose: {purpose}\n\nData:\n"
    prompt += data_prompt
    prompt += "\nOutput:"
    for num in range(1, num_topics+1):
        prompt += f'\n{num}. '
    # prompt += f"\n1. \n2. \n3. \n4. \n5. \n"
    return prompt

def generate_prompt(data_infos):
    """
    데이터 정보가 담긴 dict를 받아서 해당 데이터에 대한 프롬프트를 생성합니다.

    Args:
        data_infos (dict): data info(dict)가 담긴 dict입니다.
            각 데이터의 id값을 key로 가지고 있습니다.
        data_info (dict): 데이터 정보가 담긴 dict입니다. 각 데이터는 dict 형태로 되어 있으며,
            다음과 같은 key를 포함합니다:
            - 'data_name' (str): 데이터명
            - 'data_description' (str): 데이터 설명
            - 'columns' (list of dict): 데이터 칼럼 정보가 담긴 dict의 리스트입니다. 각 dict는
                다음과 같은 key를 포함합니다:
                - 'column_name' (str): 칼럼명
                - 'column_description' (str): 칼럼 설명

    Returns:
        str: 해당 데이터에 대한 프롬프트 문자열입니다.
    """
    prompt = ""
    # prompt += f"{'-'*10}\n"
    for n, (id, data) in enumerate(data_infos.items(), start=1):
        prompt += f"{n}. {data['data_name']}\n"
        prompt += f"- data description\n"
        prompt += f"{data['data_description']}\n"
        prompt += f"- columns info\n"
        for column in data['columns']:
            prompt += f"\t- {column['column_name']}: {column['column_description']}\n"
        # prompt += f"{'-'*10}\n\n"
    return prompt

In [61]:
data_info = {
    12345:{
        'data_name': 'iris',
        'data_description': '붓꽃 데이터셋',
        'columns': [
            {'column_name': 'sepal_length', 'column_description': '꽃받침 길이'},
            {'column_name': 'sepal_width', 'column_description': '꽃받침 너비'},
            {'column_name': 'petal_length', 'column_description': '꽃잎 길이'},
            {'column_name': 'petal_width', 'column_description': '꽃잎 너비'},
            {'column_name': 'class', 'column_description': '붓꽃 종류'}
        ]
    },
    12346:{
        'data_name': 'titanic',
        'data_description': '타이타닉 호 생존자 데이터',
        'columns': [
            {'column_name': 'survived', 'column_description': '생존 여부 (0: 사망, 1: 생존)'},
            {'column_name': 'pclass', 'column_description': '선실 등급 (1, 2, 3 중 하나)'},
            {'column_name': 'sex', 'column_description': '성별'},
            {'column_name': 'age', 'column_description': '나이'},
            {'column_name': 'fare', 'column_description': '운임'},
            {'column_name': 'embarked', 'column_description': '승선 항구 (C = Cherbourg, Q = Queenstown, S = Southampton)'}
        ]
    }
}

In [62]:
data_prompt = generate_prompt(data_info)
text = prompt_text(data_prompt, 'ss', '아이더', 5)

In [23]:
data_info

[{'data_name': 'iris',
  'data_description': '붓꽃 데이터셋',
  'columns': [{'column_name': 'sepal_length', 'column_description': '꽃받침 길이'},
   {'column_name': 'sepal_width', 'column_description': '꽃받침 너비'},
   {'column_name': 'petal_length', 'column_description': '꽃잎 길이'},
   {'column_name': 'petal_width', 'column_description': '꽃잎 너비'},
   {'column_name': 'class', 'column_description': '붓꽃 종류'}]},
 {'data_name': 'titanic',
  'data_description': '타이타닉 호 생존자 데이터',
  'columns': [{'column_name': 'survived',
    'column_description': '생존 여부 (0: 사망, 1: 생존)'},
   {'column_name': 'pclass', 'column_description': '선실 등급 (1, 2, 3 중 하나)'},
   {'column_name': 'sex', 'column_description': '성별'},
   {'column_name': 'age', 'column_description': '나이'},
   {'column_name': 'fare', 'column_description': '운임'},
   {'column_name': 'embarked',
    'column_description': '승선 항구 (C = Cherbourg, Q = Queenstown, S = Southampton)'}]}]

In [67]:
with get_openai_callback() as cb:
    data_prompt = generate_prompt(data_info)
    res = chain.run(text=text)
    # result = llm("Tell me a joke")
    print(cb)
    # print(cb.total_tokens)

Tokens Used: 379
	Prompt Tokens: 291
	Completion Tokens: 88
Successful Requests: 1
Total Cost (USD): $0.000758


In [64]:
print(res)

1. Exploring Iris dataset: A Comprehensive Analysis of Sepal and Petal Attributes
2. Titanic Survival Analysis: Insights into the Impact of Age, Gender and Class on Passenger Survival
3. Visualizing Iris Dataset: Uncovering Hidden Patterns and Correlations through Data Visualization Techniques
4. Predicting Titanic Passenger Survival: A Machine Learning Analysis based on Passenger Attributes
5. Embarking on Titanic's Voyage: An Exploratory Data Analysis of Passenger Demographics and Ticket Prices.


In [69]:
import re
topics = re.findall(r"\d+\.\s(.+)", res)
topics

['Understanding the correlation between petal length and width in the iris dataset',
 'Demographic analysis of Titanic survivors: age, gender, and class',
 'Predicting survival rates on the Titanic based on passenger data using machine learning algorithms',
 'A comparison of the distribution of sepal length and width in different types of iris flowers ',
 'Exploring the relationship between fare and survival rates on the Titanic in different passenger classes']