### Install Flask package using pip
The exclamation mark '!' is used to execute Linux commands. 
If the Flask package is installed, you can ignore the cell below.

In [1]:
#!pip3 install flask

In [2]:
#!pip install --upgrade flask


In [3]:
#!pip install watchdog

In [4]:
#!pip install ailabs_asr

In [5]:
#!pip install pydub

In [6]:
#  環境依賴
import os
import gc
import sqlite3
import uuid
from datetime import datetime
from werkzeug.utils import secure_filename
from pydub import AudioSegment
from ailabs_asr.streaming import StreamingClient 
from flask import Flask, render_template, request, jsonify
from classification import GPT_classification



In [7]:
app = Flask(__name__)

In [8]:
# 資料路徑設定
DATABASE_FOLDER = os.path.join(app.root_path, 'instance')
UPLOAD_FOLDER = os.path.join(app.root_path, 'uploads')

DATABASE = os.path.join(DATABASE_FOLDER, 'database.db')

In [9]:
# 設定允許的檔案類型
ALLOWED_EXTENSIONS = {'mp3', 'aac', 'flac', 'ogg', 'wav', 'm4a'}

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

In [10]:
# 確保所有需要的文件夾存在
for folder in [DATABASE_FOLDER, UPLOAD_FOLDER]:
    if not os.path.exists(folder):
        os.makedirs(folder)

In [11]:
# 定義根目錄
@app.route('/')
def index():
    return render_template('index.html')

In [12]:
# 定義分類目錄入口
@app.route('/classify')
def classify():
    return render_template('class.html')

In [13]:
#定義身體頁面
@app.route('/body')
def bodyPage():
   return render_template('classTemplate.html', title = "BODY")

In [14]:
#定義心靈頁面
@app.route('/psycho')
def psychoPage():
   return render_template('classTemplate.html', title = "PSYCHO")

In [15]:
#定義社會頁面
@app.route('/social')
def socialPage():
   return render_template('classTemplate.html', title = "SOCIAL")

In [16]:
#定義特殊頁面
@app.route('/special')
def specialPage():
   return render_template('classTemplate.html', title = "SPECIAL")

In [17]:
#定義其他頁面
@app.route('/extra')
def extraPage():
   return render_template('classTemplate.html', title = "EXTRA")

In [18]:
# 查詢姓名列表
@app.route('/fetchNameList', methods=['GET']) 
def fetchNameList():
    try:
        # 使用 with 語句管理資料庫連線
        with sqlite3.connect(DATABASE) as conn:
            cursor = conn.cursor()
            cursor.execute("SELECT name FROM nameList")  # 假設表格名稱為 'nameList'，字段名稱為 'name'
            names = [row[0] for row in cursor.fetchall()]
        return jsonify(names)

    except Exception as e:
        return jsonify({'error': str(e)}), 500


In [19]:
# 新增姓名到資料庫
@app.route('/addName', methods=['POST'])
def addName():
    try:
        data = request.get_json()
        new_name = data.get('name')
        if not new_name:
            return jsonify({'error': 'No name provided'}), 400
        
        conn = sqlite3.connect(DATABASE)
        cursor = conn.cursor()
        cursor.execute("INSERT INTO nameList (name) VALUES (?)", (new_name,))
        conn.commit()
        conn.close()
        return jsonify({'success': True}), 200
    except Exception as e:
        return jsonify({'error': str(e)}), 500

In [20]:
# 查詢指定人名的逐字稿
@app.route('/fetchTranscripts', methods=['GET'])
def fetchTranscripts():
    person = request.args.get('person')  # 從請求中獲取人名參數
    if not person:
        return jsonify({'error': 'No person provided'}), 400

    try:
        conn = sqlite3.connect(DATABASE)
        cursor = conn.cursor()
        # 根據名稱查詢逐字稿
        cursor.execute("SELECT content, timestamp FROM transcripts WHERE name = ? ORDER BY timestamp DESC", (person,))
        rows = cursor.fetchall()
        transcripts = [{'content': row[0], 'timestamp': row[1]} for row in rows]
        conn.close()
        app.logger.info(f'Transcripts fetched for {person}: {transcripts}')

        return jsonify(transcripts)

    except Exception as e:
        return jsonify({'error': str(e)}), 500

In [21]:
# 新增指定人名的逐字稿
@app.route('/uploadTranscript', methods=['POST'])
def uploadTranscript():
    data = request.get_json()
    name = data.get('name')
    content = data.get('content')
    save_transcript_to_db(name, content)
    return jsonify({'message': 'Transcript uploaded successfully'}), 200

In [22]:
# 編輯指定人名的逐字稿
@app.route('/editTranscript', methods=['POST'])
def editTranscript():
    data = request.get_json()
    timestamp = data.get('timestamp')
    new_content = data.get('newContent')

    if not timestamp or not new_content:
        app.logger.error('Timestamp or new content is missing in the request')
        return jsonify({'error': 'Timestamp or new content is missing'}), 400

    try:
        conn = sqlite3.connect(DATABASE)
        cursor = conn.cursor()
        cursor.execute("UPDATE transcripts SET content = ? WHERE timestamp = ?", (new_content, timestamp))
        conn.commit()
        conn.close()
        app.logger.info(f'Transcript edited at {timestamp}')
        return jsonify({'message': 'Transcript edited successfully'}), 200
    except Exception as e:
        app.logger.error(f'Error editing transcript at {timestamp}: {e}')
        return jsonify({'error': str(e)}), 500

In [23]:
# 刪除指定人名的逐字稿
@app.route('/deleteTranscript', methods=['POST'])
def deleteTranscript():
    data = request.get_json()
    timestamp = data.get('timestamp')

    if not timestamp:
        app.logger.error('Timestamp is missing in the request')
        return jsonify({'error': 'Timestamp is missing'}), 400

    try:
        conn = sqlite3.connect(DATABASE)
        cursor = conn.cursor()
        cursor.execute("DELETE FROM transcripts WHERE timestamp = ?", (timestamp,))
        conn.commit()
        conn.close()
        app.logger.info(f'Transcript deleted at {timestamp}')
        return jsonify({'message': 'Transcript deleted successfully'}), 200
    except Exception as e:
        app.logger.error(f'Error deleting transcript at {timestamp}: {e}')
        return jsonify({'error': str(e)}), 500

In [24]:
# 確保音訊格式正確
def ensure_audio_format(input_file, output_file=None):
    """
    將音訊檔檢查並轉換為符合指定格式的 WAV 檔案：
    16kHz, 單聲道, 16位深度 (PCM 格式)。
    """
    
    try:
        print("開始處理音訊格式轉換")
        
        # 確認輸入檔是否存在
        if not os.path.isfile(input_file):
            raise FileNotFoundError(f"找不到檔案: {input_file}")
        print("輸入檔存在")
        
        # 檢查檔案副檔名是否支援
        file_extension = os.path.splitext(input_file)[1][1:].lower()
        print(f"檔案副檔名: {file_extension}")
        if file_extension not in ALLOWED_EXTENSIONS:
            raise ValueError(f"不支援的音訊格式: {file_extension}.")
        print("音訊格式受支持")
        
        # 自動生成輸出檔案名稱
        if output_file is None:
            output_file = os.path.splitext(input_file)[0] + '_processed.wav'
        print(f"輸出檔案路徑: {output_file}")

        # 設定 ffmpeg 的路徑
        ffmpeg_path = os.path.join(app.root_path, 'extension', 'ffmpeg', 'bin', 'ffmpeg.exe')

        # 確認 ffmpeg 可用
        if not os.path.isfile(ffmpeg_path):
            raise RuntimeError(f"ffmpeg 無法找到，檢查路徑是否正確：{ffmpeg_path}")
        print(f"ffmpeg 路徑: {ffmpeg_path}")

        # 構建轉換命令
        command = [
            ffmpeg_path,
            '-i', input_file,
            '-ar', '16000',        # 采樣率 16kHz
            '-ac', '1',            # 單聲道
            '-sample_fmt', 's16',  # 16位深度
            output_file
        ]
        print(f"執行命令：{' '.join(command)}")
        
        # 執行轉換命令
        result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        print("ffmpeg 執行成功")
        print(result.stdout.decode())
        # print(f"音頻已成功轉換並保存：{output_file}")
        
        return output_file
    
    except subprocess.CalledProcessError as e:
        print(f"ffmpeg 執行失敗：{e}")
        print(e.stderr.decode())
        raise Exception(f"處理失敗: {e.stderr.decode()}")
    except Exception as e:
        print(f"音訊格式處理失敗：{e}")
        raise Exception(f"處理失敗: {e}")
    finally:
        # 顯式釋放資源
        gc.collect()

In [25]:
# 語音轉文字模塊
def transcribe_audio(file_path):
    """
    使用語音轉文字模塊來處理音頻文件，將音頻文件轉換為文本。
    """

    # 獲取金鑰檔案
    current_dir = os.getcwd()
    key_file_path = os.path.join(current_dir, 'key.txt')
    
    if not os.path.exists(key_file_path):
        raise FileNotFoundError(f"檔案 '{key_file_path}' 不存在，請檢查檔案路徑或內容！")

    # 繼續讀取金鑰
    with open(key_file_path, 'r') as file:
        api_key = file.read().strip()
        if api_key == "" or None:
            raise FileNotFoundError(f"金鑰為空！")

    transcript = []
    def on_processing_sentence(message):
        print(f'hello: {message["asr_sentence"]}')

    def on_final_sentence(message):
        transcript.append(message["asr_sentence"])
        #print(f'world: {message["asr_sentence"]}')
        
    asr_client = StreamingClient(key=api_key)

    # 開始語音轉文字處理
    asr_client.start_streaming_wav(
        pipeline='asr-zh-tw-std',
        file=file_path,
        #on_processing_sentence=on_processing_sentence,
        on_final_sentence=on_final_sentence
    )
    
    return transcript

In [26]:
def classify_contents(name, timestamp):
    command = "SELECT name, content FROM transcripts WHERE name = '{}' ORDER BY timestamp DESC LIMIT 1".format(name)
    name, result = GPT_classification(DATABASE_PATH=DATABASE, sql_command=command)
    try:
        conn = sqlite3.connect(DATABASE)
        cursor = conn.cursor()
        for category in result:
            if len(result[category]) != 0:
                for speaker in result[category][0]:
                    cursor.execute(
                        "INSERT INTO GPT_ClassificationResults VALUES ('{}', '{}', '{}：{}', '{}')".format(
                            name, category, speaker, result[category][0][speaker], timestamp))
                    conn.commit()
        conn.close()
    except Exception as e: app.logger.error(f'Error: {e}')

In [27]:
# 將逐字稿保存到資料庫
def save_transcript_to_db(user, content):
    """
    將逐字稿保存到資料庫中。
    """
    try:
        conn = sqlite3.connect(DATABASE)
        cursor = conn.cursor()
        timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        if not user or not content:
            app.logger.error('Name or content is missing in the request')
            return jsonify({'error': 'Name or content is missing'}), 400
        
        cursor.execute("INSERT INTO transcripts (name, content, timestamp) VALUES (?, ?, ?)", (user, content, timestamp))
        
        conn.commit()
        conn.close()
        print(f"逐字稿已成功儲存到資料庫，使用者：{user}")
        app.logger.info(f'Transcript uploaded for {user} at {timestamp}')
        classify_contents(name=user, timestamp=timestamp)
    except Exception as e:
        app.logger.error(f'Error uploading transcript for {user}: {e}')
        return jsonify({'error': str(e)}), 500

In [28]:
# 上傳錄音檔
@app.route('/uploadRecord', methods=['POST'])
def upload_record():
    print("接收到上傳請求")
    converted_file_path = None  # 初始化變數

    if 'file' not in request.files:
        print("沒有檔案被上傳")
        return jsonify({'success': False, 'message': '沒有檔案被上傳'}), 400
    
    file = request.files['file']
    print(f"接收到的文件名稱：{file.filename}")

    if file.filename == '':
        print("文件名稱為空")
        return jsonify({'error': 'No selected file'}), 400

    # 檢查文件是否符合允許的類型
    if not allowed_file(file.filename):
        print(f"不允許的文件類型：{file.filename}")
        return jsonify({'error': 'File type not allowed'}), 400

    # 生成唯一文件名
    temp_file_path = os.path.join(UPLOAD_FOLDER, f"temp_{uuid.uuid4().hex}_{file.filename}")

    try:
        # 先儲存上傳的原始檔案到磁碟
        with open(temp_file_path, 'wb') as f:
            file.save(f)
        print(f"臨時文件已成功保存至：{temp_file_path}")

        # 轉換音頻格式並重新儲存
        converted_file_path = ensure_audio_format(temp_file_path)
        print(f"轉換文件已成功保存至：{converted_file_path}")

        print("正在轉換逐字稿......")
        contents = transcribe_audio(converted_file_path)
        print("轉換完成")
        print(contents)
  
        user = request.form.get('user')
        
        # 使用 join 生成合併後的內容
        transcript = "\n".join(contents)
        
        print(transcript)    
        save_transcript_to_db(user, transcript)

    except Exception as e:
        print(f"音頻轉換失敗：{e}")
        return jsonify({'error': 'Audio conversion failed'}), 500
    
    finally:
        # 刪除原始文件
        if os.path.exists(temp_file_path):
            os.remove(temp_file_path)
        print(f"原始臨時文件已刪除：{temp_file_path}")

        # 刪除轉換文件（如果存在）
        if converted_file_path and os.path.exists(converted_file_path):
            print(f"轉換文件已刪除：{converted_file_path}")
            os.remove(converted_file_path)

    return jsonify({'message': 'File uploaded and converted successfully', 'converted_file_path': converted_file_path}), 200

In [29]:
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.17.0.2:5000
Press CTRL+C to quit
172.17.0.1 - - [29/Nov/2024 09:53:47] "GET / HTTP/1.1" 200 -
172.17.0.1 - - [29/Nov/2024 09:53:47] "GET /static/css/style.css HTTP/1.1" 304 -
172.17.0.1 - - [29/Nov/2024 09:53:47] "GET /static/js/index.js HTTP/1.1" 304 -
172.17.0.1 - - [29/Nov/2024 09:53:47] "GET /static/images/profile/01.png HTTP/1.1" 304 -
172.17.0.1 - - [29/Nov/2024 09:53:47] "GET /fetchTranscripts?person=張偉 HTTP/1.1" 200 -
172.17.0.1 - - [29/Nov/2024 09:53:48] "GET /fetchNameList HTTP/1.1" 200 -
172.17.0.1 - - [29/Nov/2024 09:53:49] "GET /fetchTranscripts?person=測試機器人 HTTP/1.1" 200 -
172.17.0.1 - - [29/Nov/2024 09:53:52] "POST /deleteTranscript HTTP/1.1" 200 -
172.17.0.1 - - [29/Nov/2024 09:53:53] "GET /fetchTranscripts?person=測試機器人 HTTP/1.1" 200 -


逐字稿已成功儲存到資料庫，使用者：測試機器人


172.17.0.1 - - [29/Nov/2024 09:54:19] "POST /uploadTranscript?name=測試機器人 HTTP/1.1" 200 -


GPT classification successful


172.17.0.1 - - [29/Nov/2024 09:54:21] "GET /fetchTranscripts?person=測試機器人 HTTP/1.1" 200 -
