#### 챗봇 예제(역할 부여)

#### 챗봇 예제(Gradio 사용)

In [3]:
from langchain_community.chat_models import ChatOllama
from langchain.schema import HumanMessage, AIMessage
import gradio as gr

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# ChatOllama 모델 초기화
model = ChatOllama(model="gemma2", temperature=0.7, verbose=False)

  model = ChatOllama(model="gemma2", temperature=0.7, verbose=False)


In [5]:
# 채팅 기록을 포함하여 응답을 생성하는 함수
def chat(message, history):
    # 이전 대화 기록을 ChatOllama 형식으로 변환
    chat_history = []
    for human, ai in history:
        chat_history.append(HumanMessage(content=human))
        chat_history.append(AIMessage(content=ai))
        
    # add current message
    chat_history.append(HumanMessage(content=message))
    
    # create reponse using model
    response = model.invoke(chat_history)
    
    return response.content

In [6]:
# Setting Gradio interface
demo = gr.ChatInterface(
    fn=chat,
    examples=[
        "안녕하세요!",
        "인공지능에 대해 설명해주세요.",
        "파이썬의 장점은 무엇인가요?"
    ],
    title="AI ChatBot",
    description="질문을 입력하시면 AI가 답변해요!"
)



In [20]:
# Run server
demo.launch(server_port=7861, server_name="0.0.0.0")

* Running on local URL:  http://0.0.0.0:7861

To create a public link, set `share=True` in `launch()`.




In [21]:
demo.close()

Closing server running on port: 7861


#### 챗봇 예제(Gradio + csv 사용)

In [7]:
import pandas as pd
from langchain_community.chat_models import ChatOllama
from langchain.schema import HumanMessage, AIMessage
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
import gradio as gr

In [8]:
# Load csv file
df = pd.read_csv("./dataset/indata_kor.csv", encoding='CP949')

In [9]:
# split txt
txt_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
txts = txt_splitter.split_text("\n".join(df.to_string()))

In [10]:
df.head()

Unnamed: 0,inputs,response
0,유튜브 채널 hkcode에서는 무엇을 가르치나요?,"초보자 대상으로 빅데이터, 인공지능과 관련된 컨텐츠를 가르치고 있습니다."
1,유튜브 채널 hkcode는 누가 운영하나요?,한국폴리텍대학 스마트금융과 김효관 교수가 운영합니다.
2,스마트금융과는 무엇을 가르치나요?,"스마트금융과는 빅데이터, 인공지능, 웹개발 및 블록체인을 가르치고 있습니다."
3,스마트금융과 등록비용은 얼마인가요?,등록비용은 국비지원 과정으로 무료 입니다.
4,스마트금융과는 1년에 몇 명을 선발하나요?,1년에 한반을 운영하고 있고 최대 27명을 선발합니다.


In [11]:
# reset embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")





In [12]:
# create vector DB
vectorstore = FAISS.from_texts(txts, embeddings)

In [13]:
# reset ChatOllama model
llm = ChatOllama(model="gemma2", temperature=0.1)

In [14]:
qa_chain = ConversationalRetrievalChain.from_llm(
    llm,
    vectorstore.as_retriever(search_kwargs={"k":1}),
    return_source_documents=True,
    verbose=False
)

In [15]:
# 채팅 함수 정의
def chat(message, history):
    # 이전 대화 기록을 ConversationalRetrievalChain 형식으로 변환
    chat_history = [(human, ai) for human, ai in history]
    
    # create response using model
    response = qa_chain({"question": message, "chat_history": chat_history})
    
    # 소스 문서 정보 추출
    sources = set([doc.metadata.get('source', 'Unkown') for doc in response['source_documents']])
    source_info = f"\n\n참고 출처 : {', '.join(sources)}" if sources else ""
    
    return response['answer'] + source_info


In [16]:
# Setting Gradion interface
univ_demo = gr.ChatInterface(
    fn = chat,
    examples=[
        "한국폴리텍대학 스마트금융과 면접시에는 어떤걸 준비하고 가면 될까요?",
        "스마트금융과에 대해 설명해주세요",
        "한국폴리텍대한 추천할만한 학과 하나를 소개해주세요.",
        "스마트금융과는 무엇을 가르치나요?",
        "스마트금융과 등록비용은 얼마인가요?	"
    ],
    title="대학 정보 AI 챗봇",
    description="스마트금융과에 대한 질문을 입력하면 AI가 CSV데이터를 참고하여 한글로 답변합니다."
)



In [17]:
# Run server
univ_demo.launch(server_port=7861, server_name="0.0.0.0")

* Running on local URL:  http://0.0.0.0:7861

To create a public link, set `share=True` in `launch()`.




In [18]:
univ_demo.close()

Closing server running on port: 7861


<span style="color:red">
port kill  </span> 

- windows : cmd --> netstat -ao --> pid 확인  --> taskkill /f /pid  (pid번호)   
- Mac : cmd --> lsof -i :포트번호  --> kill -9 포트번호   
- 리눅스 : netstat -ntpl | grep 포트번호 --> pid 확인 --> kill -9 (pid번호)    


#### 챗봇 예제(인터넷 URL정보 요약하기)

In [20]:
import gradio as gr
import bs4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
import ollama

In [21]:
# Function to load, split, and retrieve documents
def load_and_retrieve_docs(url):
    loader = WebBaseLoader(
        web_paths = (url,),
        bs_kwargs=dict()
    )
    
    docs=loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    embeddings = OllamaEmbeddings(model="gemma2")
    
    vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)
    
    return vectorstore.as_retriever()

In [22]:
# Function to fromat documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [23]:
# Function that defines the RAG chain
def rag_chain(url, question):
    retriever = load_and_retrieve_docs(url)
    retrieved_docs = retriever.invoke(question)
    formatted_context = format_docs(retrieved_docs)
    formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"
    response = ollama.chat(model='gemma2', messages=[{'role':'user', 'content': formatted_prompt}])
    
    return response['message']['content']

In [25]:
# Gradio interface
iface = gr.Interface(
    fn = rag_chain,
    inputs = ["text", "text"],
    outputs = "text",
    title = "RAG chain Question Answering",
    description = "Enter a URL and a query to get answers from the RAG chain."
)

In [26]:
# Launch Gradion Interface as Debugging Mode
iface.launch(server_port=7861, server_name="0.0.0.0", debug=True)

* Running on local URL:  http://0.0.0.0:7861

To create a public link, set `share=True` in `launch()`.


  embeddings = OllamaEmbeddings(model="gemma2")


Created dataset file at: .gradio\flagged\dataset1.csv
Keyboard interruption in main thread... closing server.




In [27]:
iface.close()

Closing server running on port: 7861


#### 챗봇 예제(인터넷 URL정보 요약하기 + TAB추가)

In [30]:
import gradio as gr
import bs4
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
import ollama

In [31]:
# Function to load, split, and retrieve documents

def load_and_retrieve_docs(url):
    loader = WebBaseLoader(
        web_paths=(url,),
        bs_kwargs=dict()
    )

    docs = loader.load()
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    splits = text_splitter.split_documents(docs)
    embeddings = OllamaEmbeddings(model="gemma2")

    # vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)
    vectorstore = FAISS.from_documents(documents=splits, embedding=embeddings)

    return vectorstore.as_retriever()

In [32]:
# Function to format documents

def format_docs(docs):

    return "\n\n".join(doc.page_content for doc in docs)

In [33]:
# Function that defines the RAG chain

def rag_chain(url, question):

    retriever = load_and_retrieve_docs(url)

    retrieved_docs = retriever.invoke(question)

    formatted_context = format_docs(retrieved_docs)

    formatted_prompt = f"Question: {question}\n\nContext: {formatted_context}"

    response = ollama.chat(model='gemma2', messages=[{'role': 'user', 'content': formatted_prompt}])

    return response['message']['content']

<span style="color:red">Tab추가 </span>

In [34]:
# Gradio Tabbed Interface
with gr.Blocks() as iface:
    # Tab for Question and Answer
    with gr.Tab("질문과 답변"):
        gr.Interface(
            fn=rag_chain,
            inputs=["text", "text"],
            outputs="text",
            title="RAG Chain Question Answering",
            description="Enter a URL and a query to get answers from the RAG chain."
        ).render()

    # Tab for Visualization (Word Cloud)
    with gr.Tab("시각화 (워드클라우드)"):
        gr.Markdown("이 탭은 시각화를 위한 공간입니다. 워드클라우드 기능이 여기에 추가될 예정입니다.")

In [35]:
# 디버그 모드로 Gradio 인터페이스 실행
iface.launch(server_port=7861, server_name="0.0.0.0", debug=True)

* Running on local URL:  http://0.0.0.0:7861

To create a public link, set `share=True` in `launch()`.


Keyboard interruption in main thread... closing server.




In [36]:
iface.close()

Closing server running on port: 7861


#### 챗봇 예제(STT:음성을 텍스트로 전환)

In [3]:
import os
from dotenv import load_dotenv
import whisper
import gradio as gr

ImportError: DLL load failed while importing _umath_linalg: 지정된 모듈을 찾을 수 없습니다.

In [38]:
# .env 파일에서 환경 변수 로드 (필요한 경우)
load_dotenv()

False

In [54]:
# ffmpeg 경로 명시적 설정
os.environ["PATH"] += os.pathsep + r"C:\AI_project\ffmpeg\bin"
os.environ["FFMPEG_BINARY"] = r"C:\AI_project\ffmpeg\bin\ffmpeg.exe"

In [55]:
def transcribe_audio(audio_path):
    # Loading Whisper Model
    model = whisper.load_model("base")
    
    # 오디오 파일 전사
    result = model.transcribe(audio_path)
    
    # 전사된 텍스트 반환
    return result["text"]

In [56]:
def process_audio(audio):
    if audio is None:
        return "Upload Audio File."
    try:
        transcribe_text = transcribe_audio(audio)
        return transcribe_text
    except Exception as e:
        return f"Error!!: {str(e)}"

In [57]:
# Gradio 인터페이스 생성
iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath", label="MP3 파일 업로드"),
    outputs="text",
    title = "MP3 to Text Converter",
    description="MP3 파일을 업로드하면 텍스트로 변환합니다."
)

In [None]:
# 디버그 모드로 Gradio 인터페이스 실행
iface.launch(server_port=7861, server_name="0.0.0.0", debug=True)

* Running on local URL:  http://0.0.0.0:7861

To create a public link, set `share=True` in `launch()`.


  checkpoint = torch.load(fp, map_location=device)


In [1]:
iface.close()

NameError: name 'iface' is not defined