# Environment

In [None]:
!pip install git+https://github.com/openai/whisper.git

!pip install llama-index
!pip install llama-index-llms-gemini llama-index-embeddings-gemini #llama-index-embeddings-openai
!pip install tqdm

!pip install langchain-google-genai
!pip install --upgrade langchain
!pip install -U langchain-community
!pip install faiss-cpu

!pip install gradio

!pip install pymuPDF

!pip install wordcloud matplotlib jieba

In [None]:
GEMINI_API = "YOUR_GEMINI_API_KEY"

import os
# 若要避免 GCE Metadata Issue，可以設定 GOOGLE_API_KEY
os.environ["GOOGLE_API_KEY"] = GEMINI_API

save_folder = '/content/drive/MyDrive/RAG/'
audio_folder = save_folder + 'voice/'
save_text_folder = save_folder + 'text/'

caption_folder = '/content/caption/'
text_folder = '/content/text/'
summary_folder = '/content/summary/'
font_path = '/content/drive/MyDrive/RAG/font/NotoSansCJK-Regular.ttc'

from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Audio

In [None]:
import time
from tqdm import tqdm
import shutil
import gradio as gr
import whisper
from llama_index.llms.gemini import Gemini

Gemini_model = Gemini(
    api_key=GEMINI_API,
    model_name="models/gemini-2.0-flash",
    temperature=0.1,
)

model_size = "base"
model = whisper.load_model(model_size)

def speech_to_text(audio_path, model):
    """使用 Whisper 模型將音檔轉成文字"""
    result = model.transcribe(audio_path)
    return result["text"]

def get_audio_lists():
    """取得已處理/未處理音檔清單"""
    audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.mp3')]
    audio_files.sort()

    processed_list = []
    unprocessed_list = []
    for audio_file in audio_files:
        base_name, _ = os.path.splitext(audio_file)
        txt_file = base_name + '.txt'
        txt_path = os.path.join(text_folder, txt_file)
        if os.path.exists(txt_path):
            processed_list.append(audio_file)
        else:
            unprocessed_list.append(audio_file)
    return processed_list, unprocessed_list

def generate_html_list(items, empty_text="目前沒有資料"):
    """將清單 items 轉換成 <ul><li>...</li></ul> 的 HTML"""
    if not items:
        return "目前沒有處理完成的音檔"
    html_str = "<ul>"
    for it in items:
        html_str += f"<li>{it}</li>"
    html_str += "</ul>"
    return html_str

# Whisper -> Gemini 修復的 Prompt
prompt_prefix = """
以下是一篇語音轉文字的逐字稿，許多地方出現辨識錯誤。
請根據你的理解，適當的替換掉詞彙，修復這篇逐字稿。
請達成以下兩點要求：
1. 使用繁體中文與英文，避免使用簡體中文。
2. 加上逗號和句號，並且在適當的地方分段，每段之間用一個空行隔開。

以下為逐字稿：
"""

def upload_audio(file):
    """將使用者上傳的音檔存到 audio_folder"""
    if file is None:
        return "⚠️ 沒有可上傳的檔案！"

    if isinstance(file, dict):
        filename = file.get('orig_name') or file.get('name')
        if not filename:
            filename = "uploaded_audio.mp3"
        dst_path = os.path.join(audio_folder, filename)
        with open(dst_path, 'wb') as f:
            f.write(file['data'])
        return f"✅ 已上傳音檔：{filename}"
    else:
        filename = os.path.basename(file.name)
        dst_path = os.path.join(audio_folder, filename)
        shutil.copyfile(file.name, dst_path)
        return f"✅ 已上傳音檔：{filename}"

def convert_selected_audio(selected_files):
    """
    透過 generator (yield) 提供動態進度回饋：
      - 每處理一個音檔，就即時告知前端「目前處理第幾個 / 總共多少」。
      - 完成後再顯示最終訊息。
    """
    if not selected_files:
        # 第一次 yield 就回傳錯誤訊息並結束
        yield ("⚠️ 請至少勾選一個音檔！", generate_html_list([]), [])
        return

    total = len(selected_files)
    converted_count = 0

    # 這行只是後台終端顯示用，不會影響 Gradio 介面
    for idx, audio_file in enumerate(tqdm(selected_files, desc="Converting files", leave=False), start=1):
        # 前端介面更新：目前正處理第 idx/total 個檔案
        yield (f"正在處理：{audio_file} ({idx}/{total})", generate_html_list([]), [])

        audio_path = os.path.join(audio_folder, audio_file)
        if not os.path.exists(audio_path):
            continue

        base_name, _ = os.path.splitext(audio_file)

        # (1) Whisper => 產生「語音逐字稿」raw_caption
        raw_caption = speech_to_text(audio_path, model)

        # (2) 儲存逐字稿到 caption_folder
        caption_filename = base_name + '.txt'
        caption_path = os.path.join(caption_folder, caption_filename)
        with open(caption_path, 'w', encoding='utf-8') as f:
            f.write(raw_caption)

        # (3) 呼叫 Gemini 修復
        prompt_text = prompt_prefix + raw_caption
        fixed_caption = Gemini_model.complete(prompt_text).text

        # (4) 儲存修復後文字到 text_folder
        text_filename = base_name + '.txt'
        text_path = os.path.join(text_folder, text_filename)
        with open(text_path, 'w', encoding='utf-8') as f:
            f.write(fixed_caption)

        converted_count += 1
        # 可以做輕量暫停，讓前端有機會捕捉更新
        time.sleep(0.2)

    # 所有音檔處理完後，更新介面顯示最終結果
    processed, unprocessed = get_audio_lists()
    processed_html = generate_html_list(processed)
    yield (f"✅ 已成功轉換 {converted_count} 個音檔！", processed_html, unprocessed)

def refresh_audio():
    """重新整理 Audio 分頁"""
    processed, unprocessed = get_audio_lists()
    processed_html = generate_html_list(processed)
    return processed_html, gr.update(choices=unprocessed, value=[]), None, "", ""


  Gemini_model = Gemini(


# Text

In [None]:
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
from langchain.document_loaders import TextLoader
import fitz  # PyMuPDF for PDF parsing
from langchain_google_genai import GoogleGenerativeAIEmbeddings

vector_db = None
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

def extract_text_from_pdf(file_path):
    """使用 PyMuPDF 解析 PDF 內容"""
    text = ""
    with fitz.open(file_path) as doc:
        for page in doc:
            text += page.get_text()
    return text

def get_text_files():
    # 同時列出 .txt 與 .pdf 檔案
    return [f for f in sorted(os.listdir(text_folder)) if f.endswith('.txt') or f.endswith('.pdf')]


def upload_text_file(file):
    """
    上傳 .txt/.pdf 到 text_folder，並回傳訊息 & 更新 CheckBoxGroup 列表
    支援多檔案上傳 (file_count="multiple")
    """
    if file is None:
        return "⚠️ 沒有檔案可以上傳！", []

    filenames = []
    if isinstance(file, list):
        files = file
    else:
        files = [file]

    for f in files:
        if isinstance(f, dict):
            fname = f.get('orig_name') or f.get('name')
            if not fname:
                fname = "uploaded_file.txt"
            dst_path = os.path.join(text_folder, fname)
            with open(dst_path, 'wb') as out:
                out.write(f['data'])
            filenames.append(fname)
        else:
            fname = os.path.basename(f.name)
            dst_path = os.path.join(text_folder, fname)
            shutil.copyfile(f.name, dst_path)
            filenames.append(fname)

    updated_txt_files = get_text_files()
    msg = f"✅ 已上傳 {len(filenames)} 個檔案: {', '.join(filenames)}"
    return msg, updated_txt_files

def process_files(uploaded_files, selected_files):
    """
    將「勾選」+「剛上傳」的文件整合，切 chunk 後建立 FAISS 向量資料庫
    並回傳： (處理結果: str, 文字檔案清單: HTML, 文字檔案清單: HTML)
      - 第2 & 第3 個輸出分別要顯示在 Text 與 Chat 分頁
    """
    global vector_db
    docs = []
    used_files = []  # 用來記錄本次建立資料庫使用的檔案

    # 1) 處理勾選檔案
    if selected_files:
        for filename in selected_files:
            file_path = os.path.join(text_folder, filename)
            used_files.append(filename)  # 記錄
            if filename.endswith(".txt"):
                loader = TextLoader(file_path, encoding='utf-8')
                documents = loader.load()
                docs.extend(documents)
            elif filename.endswith(".pdf"):
                pdf_text = extract_text_from_pdf(file_path)
                doc = Document(page_content=pdf_text, metadata={"source": file_path})
                docs.append(doc)

    # 2) 處理剛上傳的檔案
    uploaded_count = 0
    if uploaded_files:
        if isinstance(uploaded_files, dict) or hasattr(uploaded_files, 'name'):
            uploaded_files = [uploaded_files]

        for f in uploaded_files:
            if isinstance(f, dict):
                up_name = f.get('orig_name') or f.get('name')
            else:
                up_name = os.path.basename(f.name)

            used_files.append(up_name)

            if up_name.endswith(".txt"):
                loader = TextLoader(os.path.join(text_folder, up_name), encoding='utf-8')
                documents = loader.load()
                docs.extend(documents)
                uploaded_count += 1
            elif up_name.endswith(".pdf"):
                pdf_text = extract_text_from_pdf(os.path.join(text_folder, up_name))
                doc = Document(page_content=pdf_text, metadata={"source": up_name})
                docs.append(doc)
                uploaded_count += 1
            else:
                return f"⚠️ 不支援的檔案格式: {up_name}", "", ""

    if not docs:
        return "⚠️ 沒有可用文件，請至少勾選或上傳 .txt / .pdf 文件", "", ""

    # 3) 建立向量資料庫
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = text_splitter.split_documents(docs)
    vector_db = FAISS.from_documents(chunks, embeddings)

    total_count = (len(selected_files) if selected_files else 0) + uploaded_count
    result_msg = f"✅ 已成功處理 {total_count} 個文件並建立向量資料庫！"

    # 用 HTML 條列式呈現
    info_html = "<p>本次向量資料庫使用的檔案：</p>" + generate_html_list(used_files, "（無）")

    # 同一個 HTML 分別給「Text 分頁」與「Chat 分頁」顯示
    return result_msg, info_html, info_html

def refresh_text_tab():
    """
    重新整理 Text 分頁：
      - 清空上傳檔案
      - 清空上傳結果
      - 清空處理結果
      - 清空保存結果
      - 清空「資料庫資訊」
      - 更新 CheckBoxGroup
    """
    txt_files = get_text_files()
    return gr.update(choices=txt_files, value=[]), None, "", "", ""

import os
import shutil

def save_text_files():
    """
    將 ./text_folder 裡的所有檔案複製到使用者指定的雲端路徑 (save_path + '/text_folder')
    並回傳一個提示訊息
    """

    if not os.path.exists(text_folder):
        return "⚠️ 本機 text_folder 不存在，沒有可保存的檔案。"

    # 建立雲端的資料夾 (如果不存在)
    os.makedirs(save_text_folder, exist_ok=True)

    # 逐檔複製
    for filename in os.listdir(text_folder):
        source_file = os.path.join(text_folder, filename)
        target_file = os.path.join(save_text_folder, filename)

        if os.path.isfile(source_file):
            shutil.copyfile(source_file, target_file)

    return f"✅ 已成功將 {text_folder} 裡的所有檔案，複製到 {save_text_folder}。"


# Summarize

In [None]:
summarize_prompt = "\n請幫我總結以上內容，列出最重要的要點與關鍵資訊。請使用條列式重點，並維持在 200 字以內。"
os.makedirs(summary_folder, exist_ok=True)

def summarize_file(selected_file):
    """
    選擇一個 text_folder 中的檔案，Gemini 生成摘要 (Markdown)，
    同時存入 summary_folder
    """
    if not selected_file:
        return "⚠️ 請先選擇一個檔案！", ""

    text_path = os.path.join(text_folder, selected_file)
    if not os.path.exists(text_path):
        return "⚠️ 找不到檔案，請重新整理！", ""

    # 讀取檔案內容
    with open(text_path, 'r', encoding='utf-8') as f:
        text_content = f.read()

    # 呼叫 Gemini 產生摘要 (Markdown)
    summary = Gemini_model.complete(text_content + summarize_prompt).text

    # 存檔
    base_name, _ = os.path.splitext(selected_file)
    summary_filename = base_name + "_summary.txt"
    summary_path = os.path.join(summary_folder, summary_filename)
    with open(summary_path, 'w', encoding='utf-8') as f:
        f.write(summary)

    return f"✅ 已成功為「{selected_file}」生成摘要，並存入 {summary_filename}", summary

def refresh_summary_tab():
    """重新整理 Summary 分頁"""
    files = [f for f in sorted(os.listdir(text_folder)) if f.endswith('.txt')]
    return gr.update(choices=files, value=None), "", ""


# Chat

In [None]:
import gradio as gr
import fitz
import datetime
import pytz
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import TextLoader
from langchain.docstore.document import Document
from langchain.prompts import PromptTemplate
from langchain.chains.question_answering import load_qa_chain

# === 初始化 ===
llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash", temperature=0.2)

# 定義 Prompt 模板
template = """
## 任務說明
請你扮演一個專業的助手，根據以下提供的文本，回答用戶的問題。
請提供詳細、準確、有條理的回答。
請嚴格遵守以下事項：
1. 如果文本中找不到相關資訊，請回答不知道，不要隨意編造答案。
2. 全程使用繁體中文，避免使用簡體中文，專有名詞可以使用英文回答。

## 文本
[文本開始]
{context}
[文本結束]

## 用戶問答
{chat_history}
User: {question}
Assistant:
"""
PROMPT = PromptTemplate(template=template, input_variables=["context", "question", "chat_history"])
qa_chain = load_qa_chain(llm=llm, chain_type="stuff", prompt=PROMPT)

history_log = []

def format_chat_history(history):
    formatted = ""
    # 假設 history 長度為偶數，且每兩筆資料分別是 user / assistant
    for i in range(0, len(history), 2):
        user_msg = history[i]["content"]
        assistant_msg = history[i+1]["content"]
        formatted += f"User: {user_msg}\nAssistant: {assistant_msg}\n\n"
    return formatted


def gradio_chat(user_input, history):
    if vector_db is None:
        return history + [{"role": "user", "content": user_input},
                          {"role": "assistant", "content": "⚠️ 尚未上傳或選擇文件！"}], history, ""

    try:
        docs = vector_db.similarity_search(user_input, k=3)
        history_text = format_chat_history(history)
        result = qa_chain.run({
            "input_documents": docs,
            "question": user_input,
            "chat_history": history_text
        })

        tz = pytz.timezone('Asia/Taipei')
        timestamp = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S")
        history_log.append({
            "time": timestamp,
            "query": user_input,
            "answer": result
        })

        history.append({"role": "user", "content": user_input})
        history.append({"role": "assistant", "content": result})
        return history, history, ""

    except Exception as e:
        # return history + [(user_input, f"發生錯誤: {str(e)}")], history, ""
        return history + [{"role": "user", "content": user_input},
                          {"role": "assistant", "content": f"發生錯誤: {str(e)}"}], history, ""

stuff: https://python.langchain.com/docs/versions/migrating_chains/stuff_docs_chain
map_reduce: https://python.langchain.com/docs/versions/migrating_chains/map_reduce_chain
refine: https://python.langchain.com/docs/versions/migrating_chains/refine_chain
map_rerank: https://python.langchain.com/docs/versions/migrating_chains/map_rerank_docs_chain

See also guides on retrieval and question-answering here: https://python.langchain.com/docs/how_to/#qa-with-rag
  qa_chain = load_qa_chain(llm=llm, chain_type="stuff", prompt=PROMPT)


# Analysis

In [None]:
from wordcloud import WordCloud
import jieba.posseg as pseg
import matplotlib.pyplot as plt

def export_history():
    if not history_log:
        return None, "⚠️ 目前尚無查詢紀錄！"

    filename = "query_history.txt"
    with open(filename, "w", encoding="utf-8") as f:
        for entry in history_log:
            f.write(f"Time: {entry['time']}\n")
            f.write(f"Query: {entry['query']}\n")
            f.write(f"Answer: {entry['answer']}\n")
            f.write("-" * 50 + "\n")

    return filename, "✅ 查詢紀錄已生成！"

# === 生成關鍵詞詞雲（僅保留名詞） ===
def generate_keyword_cloud():
    if not history_log:
        return None, None, "⚠️ 目前尚無查詢紀錄，無法生成詞雲！"

    all_answers = " ".join([entry['answer'] for entry in history_log])

    # 使用 jieba.posseg 斷詞並標註詞性
    words = pseg.cut(all_answers)

    # 選出詞性為名詞的詞
    noun_words = [word for word, flag in words if flag.startswith('n') and len(word) > 1]

    if not noun_words:
        return None, None, "⚠️ 沒有足夠的有效名詞生成詞雲！"

    text_for_wordcloud = " ".join(noun_words)
    wc = WordCloud(font_path=font_path, width=800, height=400, background_color='white')
    wc.generate(text_for_wordcloud)
    filename = "keyword_cloud.png"
    wc.to_file(filename)

    # 顯示即時圖片
    plt.figure(figsize=(10, 5))
    plt.imshow(wc, interpolation='bilinear')
    plt.axis('off')
    plt.tight_layout()
    plt.savefig("keyword_cloud_display.png")

    return filename, "keyword_cloud_display.png", "✅ 名詞詞雲已生成！"

# Gradio

In [None]:
def build_interface():
    with gr.Blocks() as demo:

        # ========== 分頁 1: Audio ==========
        with gr.Tab("Audio"):
            gr.Markdown("## 語音處理")

            init_processed, init_unprocessed = get_audio_lists()
            init_processed_html = generate_html_list(init_processed)

            with gr.Row():
                with gr.Column():

                    # 已處理音檔 (HTML)
                    processed_html = gr.HTML(
                        value=init_processed_html,
                        label="已處理完成的音檔"
                    )

                    unprocessed_checkbox = gr.CheckboxGroup(
                        choices=init_unprocessed,
                        label="尚未處理的音檔"
                    )

                    convert_msg = gr.Label(label="轉換結果")
                    convert_btn = gr.Button("轉換文字")
                    refresh_btn = gr.Button("重新整理")

                with gr.Column():
                    audio_uploader = gr.File(
                        label="上傳新的音檔",
                        file_types=["audio"],
                        file_count="single"
                    )
                    upload_msg = gr.Label(label="上傳結果")
                    upload_btn = gr.Button("上傳")

            # 綁定
            upload_btn.click(
                fn=upload_audio,
                inputs=[audio_uploader],
                outputs=[upload_msg]
            )
            convert_btn.click(
                fn=convert_selected_audio,
                inputs=[unprocessed_checkbox],
                outputs=[convert_msg, processed_html, unprocessed_checkbox]
            )

            refresh_btn.click(
                fn=refresh_audio,
                inputs=[],
                outputs=[processed_html, unprocessed_checkbox, audio_uploader, upload_msg, convert_msg]
            )

        # ========== 分頁 2: Text ==========
        with gr.Tab("Text"):
            gr.Markdown("## 文件處理")

            init_txt_pdf_files = get_text_files()

            with gr.Row():
                with gr.Column():
                    db_info_html_text = gr.HTML(label="資料庫資訊 (Text)")  # HTML 顯示
                    db_info_html_chat = gr.HTML(label="資料庫資訊 (Chat)", visible=False)

                    text_checkbox = gr.CheckboxGroup(
                        choices=init_txt_pdf_files,
                        label="選擇已有檔案",
                        value=[]
                    )

                    process_msg = gr.Label(label="處理結果")
                    process_btn = gr.Button("建立資料庫")
                    refresh_text_btn = gr.Button("重新整理")

                with gr.Column():
                    text_file_uploader = gr.File(
                        label="上傳文件 (.txt 或 .pdf)",
                        file_types=[".txt", ".pdf"],
                        file_count="multiple"
                    )
                    text_upload_msg = gr.Label(label="上傳結果")
                    upload_files_btn = gr.Button("上傳文件")

                with gr.Column():
                    save_text_msg = gr.Label(label="保存結果")  # 顯示保存結果的訊息
                    save_text_btn = gr.Button("保存文本")

            # 上傳
            upload_files_btn.click(
                fn=upload_text_file,
                inputs=[text_file_uploader],
                outputs=[text_upload_msg, text_checkbox]
            )

            # 重新整理
            refresh_text_btn.click(
                fn=refresh_text_tab,
                inputs=[],
                outputs=[text_checkbox, text_file_uploader, text_upload_msg, process_msg, save_text_msg]
            )

            save_text_btn.click(
                fn=save_text_files,
                inputs=[],   # 從這裡讀取雲端路徑
                outputs=[save_text_msg]       # 將保存結果顯示在這個 label
            )

        # ========== 分頁 3: Summary ==========
        with gr.Tab("Summary"):
            gr.Markdown("## 生成摘要")

            # Radio 單選
            text_files = [f for f in sorted(os.listdir(text_folder)) if f.endswith('.txt')]

            with gr.Row():
                with gr.Column():
                    text_radio = gr.Radio(
                        choices=text_files,
                        label="選擇要摘要的檔案"
                    )

                    summary_btn = gr.Button("生成摘要")
                    summary_result = gr.Markdown(label="摘要內容")
                with gr.Column():
                    summary_status = gr.Label(label="摘要狀態")
                    refresh_sum_btn = gr.Button("重新整理")

            summary_btn.click(
                fn=summarize_file,
                inputs=[text_radio],
                outputs=[summary_status, summary_result]
            )
            refresh_sum_btn.click(
                fn=refresh_summary_tab,
                inputs=[],
                outputs=[text_radio, summary_status, summary_result]
            )

        # ========== 分頁 4: Chat ==========
        with gr.Tab("Chat"):
            gr.Markdown("## 問答對話")

            # 新增一個 HTML，用於顯示「目前建立的資料庫檔案資訊」
            db_info_html_chat = gr.HTML(
                value="尚未建立任何資料庫",
                label="資料庫資訊 (Chat)"
            )

            chatbot = gr.Chatbot(label="對話紀錄", type="messages")
            user_input = gr.Textbox(placeholder="請輸入您的問題...", label="輸入")
            clear = gr.Button("清除對話")

            state = gr.State([])

            user_input.submit(
                fn=gradio_chat,
                inputs=[user_input, state],
                outputs=[chatbot, state, user_input]
            )
            clear.click(lambda: ([], []), None, outputs=[chatbot, state])

        process_btn.click(
            fn=process_files,
            inputs=[text_file_uploader, text_checkbox],
            outputs=[process_msg, db_info_html_text, db_info_html_chat],
            queue=False
        )

        # ========== 分頁 5: Analysis ==========
        with gr.Tab("Analysis"):
            gr.Markdown("## 分析工具")

            with gr.Row():
                with gr.Column():
                    keyword_cloud_file = gr.File(label="下載詞雲")
                    keyword_msg = gr.Label(label="詞雲結果")
                    keyword_btn = gr.Button("生成關鍵詞詞雲")
                    keyword_cloud_display = gr.Image(label="詞雲預覽")

                with gr.Column():
                    download_file = gr.File(label="下載紀錄")
                    export_msg = gr.Label(label="匯出結果")
                    export_btn = gr.Button("匯出查詢紀錄")

            export_btn.click(export_history, outputs=[download_file, export_msg])
            keyword_btn.click(generate_keyword_cloud, outputs=[keyword_cloud_file, keyword_cloud_display, keyword_msg])

        demo.launch(debug=True)

# 建立必要資料夾 & 執行
os.makedirs(audio_folder, exist_ok=True)
os.makedirs(save_text_folder, exist_ok=True)
os.makedirs(caption_folder, exist_ok=True)
os.makedirs(text_folder, exist_ok=True)
os.makedirs(summary_folder, exist_ok=True)

if os.path.exists(save_text_folder):
    for filename in os.listdir(save_text_folder):
        src_file = os.path.join(save_text_folder, filename)
        dst_file = os.path.join(text_folder, filename)

        # 僅複製檔案 (檔名)，子資料夾不處理
        if os.path.isfile(src_file):
            # 若目標檔案已存在，就跳過
            if not os.path.exists(dst_file):
                shutil.copyfile(src_file, dst_file)

build_interface()



Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://e0145b8fff80b6a843.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
