In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#### Google Speech to TEXT

In [None]:
!pip install google-cloud-speech

Collecting google-cloud-speech
  Downloading google_cloud_speech-2.27.0-py2.py3-none-any.whl.metadata (5.2 kB)
Downloading google_cloud_speech-2.27.0-py2.py3-none-any.whl (292 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m292.4/292.4 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-cloud-speech
Successfully installed google-cloud-speech-2.27.0


##### w/ API Key

In [None]:
import requests
import json
import base64

# Google Cloud API 키 설정
api_key = "key"

# 오디오 파일의 경로
# audio_file_path = "/content/drive/MyDrive/KITA_2024/M11_최종프로젝트/Data/휴게실.wav"
audio_file_path = "/content/drive/MyDrive/KITA_2024/M11_최종프로젝트/Data/화장실.wav"
# audio_file_path = "/content/drive/MyDrive/KITA_2024/M11_최종프로젝트/Data/엘리베이터.wav"
# audio_file_path = "/content/drive/MyDrive/KITA_2024/M11_최종프로젝트/Data/오피스.wav"

def transcribe_audio(audio_file_path):
    # 오디오 파일을 읽고 base64로 인코딩
    with open(audio_file_path, "rb") as audio_file:
        content = audio_file.read()
        audio_base64 = base64.b64encode(content).decode("utf-8")

    # 요청할 데이터 구성
    data = {
        "config": {
            "encoding": "LINEAR16",
            "sampleRateHertz": 44100,  # 오디오 파일의 샘플 레이트에 맞게 설정
            "languageCode": "ko-KR"
        },
        "audio": {
            "content": audio_base64
        }
    }

    # Google Cloud Speech-to-Text API 요청 URL
    url = f"https://speech.googleapis.com/v1/speech:recognize?key={api_key}"

    # POST 요청 보내기
    headers = {"Content-Type": "application/json"}
    response = requests.post(url, headers=headers, data=json.dumps(data))

    # 응답 처리
    if response.status_code == 200:
        response_data = response.json()
        if "results" in response_data:
            transcript = response_data["results"][0]["alternatives"][0]["transcript"]
            return transcript
        else:
            return "No transcription found."
    else:
        return f"Error: {response.status_code}, {response.text}"

# Google Speech-to-Text 변환 실행
User_Goal_Sentence = transcribe_audio(audio_file_path)
print(User_Goal_Sentence)


화장실이 어디야


## LLM Handler

In [None]:
!pip install openai

Collecting openai
  Downloading openai-1.51.2-py3-none-any.whl.metadata (24 kB)
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting jiter<1,>=0.4.0 (from openai)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.6-py3-none-any.whl.metadata (21 kB)
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading openai-1.51.2-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.7/383.7 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx-0.27.2-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.4/76.4 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpcore-1.0.6-py3-none-any.whl (78 kB)
[2K   [90m━━

In [None]:
# user_input = "엘리베이터로 안내해줘"      ## Recommended Target: Elevator
# user_input = "엘리베이터 찾고 있어"       ## Recommended Target: Elevator
# user_input = "화장실을 찾고 싶어"         ## Recommended Target: Toilet
# user_input = "사무실이 어디야?"         ## Recommended Target: Office
# user_input = "휴게실을 못찾겠어"         ## Recommended Target: Lounge

In [None]:
from openai import OpenAI

## OpenAI API 키 설정

client = OpenAI(api_key='key')

## Model
MODEL = "gpt-4o-mini-2024-07-18"

## 목표 타겟들을 리스트로 정의
target_anchors = ["Toilet", "Elevator", "Office", "Lounge"]

# 리스트를 문자열로 변환하여 프롬프트에 포함
targets_str = ", ".join([f"'{target}'" for target in target_anchors])

def get_target_from_text(user_input):
    # 프롬프트 설정
    prompt = [
        {"role": "system", "content":
         f"""당신은 훌륭한 Assistant입니다.
            입력된 텍스트로부터 가고자 하는 목적지를 반환하세요. 목표 target은 {targets_str} 중 하나입니다. 가장 적절한 타겟 이름을 반환하세요.
            수식하는 말이나 사족은 필요없고 list에 있는 target만 출력해줘
         """},
        {"role": "user", "content": user_input}  # 문자열 그대로 넣기
    ]

    # OpenAI API 요청
    response = client.chat.completions.create(
        model=MODEL,
        messages=prompt,
        max_tokens=50,
        temperature=0.8,
    )

    # 응답에서 타겟 이름 추출
    target_name = response.choices[0].message.content
    return target_name

# 예시 입력으로 테스트
target_name = get_target_from_text(User_Goal_Sentence)
print(f"Recommended Target: {target_name}")


Recommended Target: Toilet


 #### W/ Google Cloud 서비스 계정 키

In [None]:
import os
import base64
from google.cloud import speech_v1p1beta1 as speech
import io

# Google Cloud API 인증 파일 설정 (json 파일 경로)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/drive/MyDrive/KITA_2024/M10_최종프로젝트/Data/~.json"

# 오디오 파일의 경로
# audio_file_path = "/content/drive/MyDrive/KITA_2024/M10_최종프로젝트/Data/휴게실.wav"
# audio_file_path = "/content/drive/MyDrive/KITA_2024/M10_최종프로젝트/Data/화장실.wav"
audio_file_path = "/content/drive/MyDrive/KITA_2024/M10_최종프로젝트/Data/엘리베이터.wav"
# audio_file_path = "/content/drive/MyDrive/KITA_2024/M10_최종프로젝트/Data/오피스.wav"


def transcribe_audio(audio_file_path):
    # Google Speech 클라이언트 초기화
    client = speech.SpeechClient()

    # 오디오 파일 열기
    with io.open(audio_file_path, "rb") as audio_file:
        content = audio_file.read()

    # 요청 설정
    audio = speech.RecognitionAudio(content=content)
    config = speech.RecognitionConfig(
        encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
        ## sample_rate_hertz=16000,
        language_code="ko-KR",
    )

    # 요청 보내기
    response = client.recognize(config=config, audio=audio)

    # 응답에서 변환된 텍스트 추출
    if response.results:  # 결과가 있는지 확인
        return response.results[0].alternatives[0].transcript
    else:
        return "No transcription found."

# Google Speech-to-Text 변환 실행
User_Goal_Sentence = transcribe_audio(audio_file_path)
print(User_Goal_Sentence)

엘리베이터 타려면 어디로 가야 돼
