# 의견 말하기 유형 질문 데이터 만들기

- 의견 말하기 유형 질문 생성 만들기
- 질문에 대한 오디오 만들기


## 질문 생성 Chain

In [4]:
import json
from typing import List

from tqdm.notebook import tqdm
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser, CommaSeparatedListOutputParser
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema import HumanMessage, AIMessage, StrOutputParser
import pandas as pd
import os
os.environ["OPENAI_API_KEY"] = "Your API Key"
model = ChatOpenAI(model="gpt-3.5-turbo")

In [5]:
model = ChatOpenAI(model="gpt-3.5-turbo")

### 질문 주제 샘플링하기

In [6]:
csv_parser = CommaSeparatedListOutputParser()

In [7]:
csv_format_instruction = csv_parser.get_format_instructions()

In [8]:
subjet_prompt_template = PromptTemplate.from_template(template="영어 시험에 나올 법한 일상적인 주제를 단어 형식으로 만들어줘.\n{format_instruction}",
                                                      partial_variables={"format_instruction": csv_format_instruction})

In [9]:
subject_chain = subjet_prompt_template | model | csv_parser

In [10]:
subject_list = subject_chain.invoke({})
subject_list

['family',
 'school',
 'work',
 'hobbies',
 'travel',
 'food',
 'health',
 'technology',
 'sports',
 'weather']

In [11]:
subject_list = subject_list[:4]
subject_list

['family', 'school', 'work', 'hobbies']

### 질문 만들기

In [12]:
template = """\
- 영어 스피킹 시험 중에 의견 말하기(express an opinion)에 나올 법한 {input} 주제 관련 질문 하나 만들어줘
- 상대방에게 그렇게 생각한 이유와 주장을 이야기하도록 요구해줘
- 한 문장만 만들어줘
- 여러 예시 만들지마
- 영어로
- 예시
Some people prefer to take a job that does not pay well but does provide a lot of time off from work. What is your opinion about taking a job with a low salary that has a lot of vacation time? Give reasons for your opinion.
"""

question_prompt_template = PromptTemplate.from_template(template=template)

In [13]:
question_chain = question_prompt_template | model | StrOutputParser()

In [14]:
question_list = []
for subject in tqdm(subject_list):
    question_list.append(question_chain.invoke({"input": subject}))

  0%|          | 0/4 [00:00<?, ?it/s]

In [15]:
question_list

['What is your opinion on the importance of spending quality time with family? Give reasons for your opinion.',
 'What is your opinion about the importance of homework in school? Give reasons for your opinion.',
 'What is your opinion about working remotely from home? Give reasons for your opinion.',
 'What is your opinion on spending a lot of time on hobbies? Give reasons to support your answer.']

## 질문에 대한 오디오 파일 만들기

In [16]:
from openai import OpenAI

In [17]:
client = OpenAI()

In [18]:
def gen_speech_file(text, output_file_path):
    response = client.audio.speech.create(
        model="tts-1",
        voice="alloy", # alloy, echo, fable, onyx, nova, and shimmer
        input=text
    )
    response.stream_to_file(output_file_path)

In [19]:
# !mkdir -p ./data/speaking__express_an_opinion

In [20]:
save_dir = "./data/speaking__express_an_opinion"

In [21]:
question_list

['What is your opinion on the importance of spending quality time with family? Give reasons for your opinion.',
 'What is your opinion about the importance of homework in school? Give reasons for your opinion.',
 'What is your opinion about working remotely from home? Give reasons for your opinion.',
 'What is your opinion on spending a lot of time on hobbies? Give reasons to support your answer.']

In [22]:
record_list = []

for i, q in tqdm(enumerate(question_list), total=len(question_list)):
    output_file_path = f"{save_dir}/question_{i}.wav"
    gen_speech_file(q, output_file_path)

    record = {"question": q, "audio_file_path": output_file_path}
    record_list.append(record)

  0%|          | 0/4 [00:00<?, ?it/s]

  response.stream_to_file(output_file_path)


In [23]:
df = pd.DataFrame(record_list)
df

Unnamed: 0,question,audio_file_path
0,What is your opinion on the importance of spen...,./data/speaking__express_an_opinion/question_0...
1,What is your opinion about the importance of h...,./data/speaking__express_an_opinion/question_1...
2,What is your opinion about working remotely fr...,./data/speaking__express_an_opinion/question_2...
3,What is your opinion on spending a lot of time...,./data/speaking__express_an_opinion/question_3...


In [24]:
df.to_csv(f"{save_dir}/question_and_audio.csv", index=False)

In [25]:
from IPython.display import Audio

In [26]:
Audio(f"{save_dir}/question_2.wav")

In [25]:
subject_list

['family', 'school', 'friends', 'hobbies']