# Pipeline for generating Problem PDDL files from recorded audio speech

## Phase 1: Record Audio

Audio will be recorded and placed in the audio_data folder.

In [10]:
import pyaudio
import wave
from datetime import datetime

CHUNK = 1024
RATE = 44100
RECORD_SECONDS = 5

pyAud = pyaudio.PyAudio()

audio_file_name = "recording_{dt}".format(dt=datetime.now()).replace(" ", "_").replace(":", "_").split(".")[0]

print(audio_file_name)
print("Recording Starting for {rt} seconds".format(rt=RECORD_SECONDS))

stream = pyAud.open(format=pyaudio.paInt16,
                channels=1,
                rate=RATE,
                input=True,
                frames_per_buffer=CHUNK)

frames = []

for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
    data = stream.read(CHUNK)
    frames.append(data)

print("* done recording")

stream.stop_stream()
stream.close()
pyAud.terminate()

wf = wave.open("audio_data/" + audio_file_name + ".wav", 'wb')
wf.setnchannels(1)
wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()

recording_2023-12-04_15_15_03
Recording Starting for 5 seconds
* done recording


## Phase 2

In the next phase, we will generate text from our audio data using the OpenAI transcription API. Be warned that running this code costs money.

In [20]:
from openai import OpenAI

#read in API key
with open('secrets/openai_autobots_key.txt') as f:
    lines = f.readlines()

client = OpenAI(api_key=lines[0])

audio_file_name = "recording_2023-12-04_15_13_10.wav"

audio_file = open("audio_data/{af}".format(af=audio_file_name), "rb")
transcript = client.audio.transcriptions.create(
  model="whisper-1", 
  file=audio_file
)

#write to file
with open("text_data/{tfn}.txt".format(tfn=audio_file_name.split(".")[0]), 'w') as f:
    f.write(transcript.text)

  

## Phase 3

Finally, we will take this text, and use the GPT 3.5 Turbo model to generate PDDL.

In [27]:
text_file_name = "recording_2023-12-04_15_13_10.txt"

with open("text_data/" + text_file_name) as f:
    lines = f.readlines()

text = lines[0]

prompt = "Generate a problem PDDL file for a robot to: "

messages = [{"role": "user", "content": prompt + text}]

response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=messages
)

pddl = response.choices[0].message.content

#write to file
with open("pddl_data/{pddlfn}.pddl".format(pddlfn=audio_file_name.split(".")[0]), 'w') as f:
    f.write(pddl)

ChatCompletion(id='chatcmpl-8SChYRoggOhy8KUFCMk34tRdzBX8s', choices=[Choice(finish_reason='stop', index=0, message=ChatCompletionMessage(content=';; Problem PDDL file for a robot to greet people\n\n(define (problem greeting-robot)\n  (:domain robot-domain)\n\n  (:objects\n    robot - robot\n    person - person\n  )\n\n  (:init\n    (at robot)\n    (at person)\n  )\n\n  (:goal\n    (greeted person)\n  )\n)', role='assistant', function_call=None, tool_calls=None))], created=1701732700, model='gpt-3.5-turbo-0613', object='chat.completion', system_fingerprint=None, usage=CompletionUsage(completion_tokens=71, prompt_tokens=24, total_tokens=95))
