In [1]:
import streamlit as st 
from PyPDF2 import PdfReader
from langchain.embeddings import OpenAIEmbeddings, SentenceTransformerEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.memory import ConversationBufferWindowMemory
from langchain.vectorstores import FAISS
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gdown

In [2]:
# PDF에서 텍스트를 가져온다
def get_pdf_text(pdf_path):
    google_path = 'https://drive.google.com/uc?id='
    file_id = '1dfTqnaO0zcPdiUjP0490S6UEgFn4hqfE-Jo9We7DDmw/edit?usp=drive_link'
    output_name = 'test.text'
    gdown.download(google_path+file_id,output_name,quiet=False)
    text = ""
    pdf_reader = PdfReader(pdf_path)
    for page in pdf_reader.pages:
        text += page.extract_text()
    return text

#지정된 조건에 따라 주어진 텍스트를 더 작은 덩어리로 분할
def get_text_chunks(text):
    text_splitter = RecursiveCharacterTextSplitter(
        separators="\\n",
        chunk_size=1000,
        chunk_overlap=200,
        length_function=len
    )
    chunks = text_splitter.split_text(text)
    return chunks

#주어진 텍스트 청크에 대한 임베딩을 생성하고 FAISS를 사용하여 벡터 저장소를 생성
def get_vectorstore(text_chunks):
    embeddings = SentenceTransformerEmbeddings(model_name='all-MiniLM-L6-v2')
    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vectorstore

In [3]:
import os
os.environ["OPENAI_API_KEY"] = ""

#주어진 벡터 저장소로 대화 체인을 초기화
def get_conversation_chain(vectorstore):
    memory = ConversationBufferWindowMemory(memory_key='chat_history', return_message=True)  #ConversationBufferWindowMemory에 이전 대화 저장
    conversation_chain = ConversationalRetrievalChain.from_llm(
        llm=ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo-16k-0613'),
        retriever=vectorstore.as_retriever(),
        get_chat_history=lambda h: h,
        memory=memory
    ) #ConversationalRetrievalChain을 통해 langchain 챗봇에 쿼리 전송
    return conversation_chain

In [4]:
# PDF 파일 경로 설정
pdf_path = "The_Adventures_of_Tom_Sawyer.pdf"
# PDF 텍스트 가져오기
raw_text = get_pdf_text(pdf_path)
# 텍스트에서 청크 검색
text_chunks = get_text_chunks(raw_text)
# PDF 텍스트 저장을 위해 FAISS 벡터 저장소 만들기
vectorstore = get_vectorstore(text_chunks)
# 대화 체인 만들기
m = get_conversation_chain(vectorstore)

FileURLRetrievalError: Failed to retrieve file url:

	Cannot retrieve the public link of the file. You may need to change
	the permission to 'Anyone with the link', or have had many accesses.
	Check FAQ in https://github.com/wkentaro/gdown?tab=readme-ov-file#faq.

You may still be able to access the file from the browser:

	https://drive.google.com/uc?id=1dfTqnaO0zcPdiUjP0490S6UEgFn4hqfE-Jo9We7DDmw/edit?usp=drive_link

but Gdown can't. Please check connections and permissions.

In [None]:
import speech_recognition as sr
import pyttsx3

# 음성 입력 (STT)
def recognize_speech():
    recognizer = sr.Recognizer()
    with sr.Microphone() as source:
        print("질문을 해주세요.")
        audio = recognizer.listen(source)
    try:
        question = recognizer.recognize_google(audio, language="ko-KR")
        print("사용자:", question)
        return question
    except sr.UnknownValueError:
        print("음성을 이해할 수 없습니다.")
        return ""
    except sr.RequestError:
        print("음성 서비스에 접근할 수 없습니다.")
        return ""

# 음성 출력 (TTS)
def speak_response(response):
    engine = pyttsx3.init()  # 기본 드라이버로 초기화
    engine.say(response)
    engine.runAndWait()

# 질문 처리 및 답변 생성
def process_question(question):
    # 질문에 대한 답변 가져오기
    response = m({"question": question})
    if 'answer' in response:
        answer = response['answer']
    else:
        print("올바른 응답 키를 찾을 수 없습니다.")
    return answer

if __name__ == "__main__":
    while True:
        question = recognize_speech()
        if question=="멈춰":
            break;
        if question:
            answer = process_question(question)
            print(answer)
            speak_response(answer)


질문을 해주세요.
사용자: 안녕하세요 안녕하세요


  warn_deprecated(
Retrying langchain_community.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised RateLimitError: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors..


RateLimitError: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.

In [None]:
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
import io

In [None]:
# 필요한 라이브러리 import
from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
from httplib2 import Http
from oauth2client.service_account import ServiceAccountCredentials

import io 

# 기본 정보 입력 및 API 열기
creds_file_path = 'aiassistant-423712-6f2e41ef18fa.json'

# 파일에서 자격 증명 불러오기
credentials = ServiceAccountCredentials.from_json_keyfile_name(creds_file_path, scopes=['https://www.googleapis.com/auth/drive'])

# 구글 드라이브에 있는 파일 및 파일 ID 출력
service = build('drive', 'v3', credentials=credentials)
results = service.files().list(pageSize=100, fields="nextPageToken, files(id, name)").execute()
items = results.get('files', [])
print(items)

# 파일 관련 정보 입력
folder_id = "1OrIj8JXzMd-VMH2QkdH8Gm55-VXyyoUh" # 위에서 복사한 구글 드라이브 폴더의 ID
file_name = "회의록"

# 파일 다운로드
request = service.files().export_media(fileId=folder_id, mimeType='application/vnd.openxmlformats-officedocument.wordprocessingml.document')
fh = io.FileIO(file_name + ".docx", 'wb') 

downloader = MediaIoBaseDownload(fh, request)

done = False
while done is False:
    status, done = downloader.next_chunk()
    print("Download %d%%." % int(status.progress() * 100))

print("다운로드가 완료되었습니다.")

[{'id': '1Ny-HZqI2NsE9OQ7iXi7oF1dU_mOnKqYbYbWKcyd4VoM', 'name': 'ai비서'}]


HttpError: <HttpError 403 when requesting https://www.googleapis.com/drive/v3/files/1OrIj8JXzMd-VMH2QkdH8Gm55-VXyyoUh/export?mimeType=application%2Fvnd.openxmlformats-officedocument.wordprocessingml.document&alt=media returned "Export only supports Docs Editors files.". Details: "[{'message': 'Export only supports Docs Editors files.', 'domain': 'global', 'reason': 'fileNotExportable'}]">