# LLM Practice

In [2]:
import gradio as gr
import bs4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain.vectorstores import FAISS
from langchain_ollama import OllamaEmbeddings
import ollama
import logging
from gtts import gTTS #type: ignore
import os
import tempfile

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [3]:
# Function to load, split, and retrieve documents
def load_and_retrieve_docs(url):
    loader = WebBaseLoader(web_paths=(url,), bs_kwargs=dict())
    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    embeddings = OllamaEmbeddings(model="gemma2")
    vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
    return vectorstore.as_retriever()

In [4]:
def text_to_speech(text, lang="ko"):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
        temp_filename = fp.name
    tts = gTTS(text=text, lang=lang)
    tts.save(temp_filename)
    return temp_filename

In [5]:
def process_tts(text, lang):
    if not text:
        return None, "텍스트를 입력해주세요."
    try:
        audio_file = text_to_speech(text, lang)
        return audio_file, "변환이 완료되었습니다. 아래에서 재생 또는 다운로드 할 수 있습니다."
    except Exception as e:
        return None, f"오류가 발생했습니다: {str(e)}"

In [6]:
# Function to format documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [7]:
def rag_chain(url, question):
    try:
        retriever = load_and_retrieve_docs(url)
        retrieved_docs = retriever.invoke(question)
        if not retrieved_docs:
            raise ValueError("No documents retrieved. Please check the retrieval process.")
        
        formatted_context = format_docs(retrieved_docs)
        formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}\n\nPlease answer in Korean."
        
        # 모델에게 질문을 보내 응답을 받음
        response = ollama.chat(model='gemma2', messages=[{
            'role': 'user',
            'content': formatted_prompt
        }])
        
        if 'message' not in response or 'content' not in response['message']:
            raise ValueError("Invalid response from model. 'message' or 'content' missing.")
        
        # 정상적인 경우, 두 값을 반환
        result = response['message']['content']
        return result, result
    
    except Exception as e:
        logging.error(f"Error in rag_chain: {e}")
        # 에러 발생 시, 기본값으로 두 값을 반환
        return "오류가 발생했습니다. URL 또는 질문을 확인해주세요.", ""

In [8]:
with gr.Blocks() as iface:
    shared_state = gr.State("")  # Shared content state

    # Tab 1: 질문과 답변
    with gr.Tab("질문과 답변"):
        url_input = gr.Textbox(label="URL 입력", placeholder="Enter a URL")
        query_input = gr.Textbox(label="요청사항", placeholder="Enter your question")
        answer_output = gr.Textbox(label="요청결과", interactive=False)
        generate_button = gr.Button("요청하기")

        # RAG Chain 호출 - 답변 생성 버튼
        generate_button.click(
            fn=rag_chain,
            inputs=[url_input, query_input],
            outputs=[answer_output, shared_state],  # 답변 결과와 shared_state에 저장
        )

    # Tab 2: 시각화 (워드클라우드)
    with gr.Tab("음성파일 생성(워드클라우드)"):
        shared_textbox = gr.Textbox(label="요청 결과 내용", interactive=False)
        language_dropdown = gr.Dropdown(
            choices=["ko", "en", "ja", "zh-cn"], label="언어 선택", value="ko"
        )
        audio_output = gr.Audio(label="생성된 오디오")
        status_message = gr.Textbox(label="상태 메시지")
        convert_button = gr.Button("변환 시작")

        # shared_state 값을 shared_textbox에 업데이트
        shared_state.change(fn=lambda x: x, inputs=shared_state, outputs=shared_textbox)

        # Text-to-Speech 호출
        convert_button.click(
            fn=process_tts,
            inputs=[shared_textbox, language_dropdown],
            outputs=[audio_output, status_message],
        )

In [9]:
# 디버그 모드로 Gradio 인터페이스 실행
iface.launch(server_port=7861, server_name="0.0.0.0", debug=True)

* Running on local URL:  http://0.0.0.0:7861

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.




In [10]:
iface.close()

Closing server running on port: 7861
