In [27]:
#!pip install flask whisper
#! pip install git+https://github.com/openai/whisper.git -q
#!pip install ffmpeg
#!pip install openai
#!pip install transformers
#!pip install librosa pyaudio
#!pip install --force-reinstall scipy

In [28]:
import logging
import time
from flask import Flask, request, jsonify
import tempfile
import whisper
import numpy as np
import os
import librosa 
import pyaudio
# Setup Flask app
app = Flask(__name__)
 # Allow cross-origin requests

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Load the Whisper model once to save resources
model = whisper.load_model("base")

@app.route('/')
def index():
    return """<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Real-Time Speech to Text</title>
    <style>
        /* Add Skype-like styling */
        body {
            font-family: Arial, sans-serif;
            background-color: #fff;
        }
        .container {
            max-width: 400px;
            margin: 40px auto;
            background-color: #f7f7f7;
            padding: 20px;
            border-radius: 10px;
            box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
        }
        .header {
            background-color: #333;
            color: #fff;
            padding: 10px;
            border-bottom: 1px solid #333;
            border-radius: 10px 10px 0 0;
        }
        .header span {
            font-weight: bold;
            font-size: 18px;
        }
        .call-info {
            margin-top: 20px;
            display: flex;
            flex-wrap: wrap;
            justify-content: space-between;
        }
        .call-info span {
            font-weight: bold;
            margin-right: 10px;
            font-size: 16px;
        }
        .transcription {
            margin-top: 20px;
            padding: 20px;
            border: 1px solid #ddd;
            border-radius: 10px;
            background-color: #f9f9f9;
        }
        .button-container {
            margin-top: 20px;
            text-align: center;
        }
        .button-container button {
            padding: 10px 20px;
            border: none;
            border-radius: 10px;
            background-color: #333;
            color: #fff;
            cursor: pointer;
        }
        .button-container button:hover {
            background-color: #444;
        }
        /* Add Skype-like video call elements */
        .video-call {
            display: flex;
            flex-wrap: wrap;
            justify-content: center;
            margin-top: 20px;
        }
        .video-call div {
            width: 50%;
            height: 200px;
            background-color: #333;
            border-radius: 10px;
            margin: 10px;
        }
        .video-call div:first-child {
            background-image: url('https://images.unsplash.com/photo-1500648767791-00dcc994a43e?q=80&w=1000&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxzZWFyY2h8Mnx8cmFuZG9tJTIwcGVvcGxlfGVufDB8fDB8fHww');
            background-size: cover;
            background-position: center;
        }
        .video-call div:last-child {
            background-image: url('https://preview.redd.it/created-random-people-using-chatgpt-midjourney-do-you-know-v0-q1aa450i5dqb1.png?width=1024&format=png&auto=webp&s=c4e9abc47d193474a2fa1a7e337d9d9340dce947');
            background-size: cover;
            background-position: center;
        }
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <span>Skype Call</span>
        </div>
        <div class="call-info">
            <span>Call ID:</span> <span>1234567890</span>
            <span>Caller:</span> <span>John Doe</span>
            <span>Duration:</span> <span>00:00:00</span>
        </div>
        <div class="video-call">
            <div></div>
            <div></div>
        </div>
        <div class="transcription">
            <p id="transcription"></p>
        </div>
        <div class="button-container">
            <button id="endCall">End Call</button>
        </div>
    </div>

    <script>
    let audioContext, microphone, mediaRecorder, audioChunks = [], silenceDetector;
    let lastSoundTimestamp = Date.now();
    const silenceThreshold = 2000; // Increased silence threshold to 2 seconds

    navigator.mediaDevices.getUserMedia({ audio: true })
        .then(stream => {
            audioContext = new AudioContext();
            microphone = audioContext.createMediaStreamSource(stream);
            mediaRecorder = new MediaRecorder(stream);
            setupMediaRecorder();
        })
        .catch(error => console.error('Error accessing media devices.', error));

    function setupMediaRecorder() {
        mediaRecorder.ondataavailable = event => {
            if (event.data.size > 0) {
                audioChunks.push(event.data);
            }
        };

        mediaRecorder.start(1000); // Collect data in chunks of 1 second

        silenceDetector = setInterval(() => {
            if ((Date.now() - lastSoundTimestamp) > silenceThreshold && audioChunks.length) {
                mediaRecorder.stop();
            }
        }, 1000); // Check every 1 second for extended silence

        microphone.connect(audioContext.createScriptProcessor(4096, 1, 1)).onaudioprocess = function(event) {
            var input = event.inputBuffer.getChannelData(0);
            var sum = 0, i = 0;
            for (; i < input.length; ++i) sum += input[i] * input[i];
            if (Math.sqrt(sum / input.length) > 0.001) { // Detect sound
                lastSoundTimestamp = Date.now();
            }
        };

        mediaRecorder.onstop = () => {
            sendAudioToServer();
            audioChunks = []; // Clear the buffer after sending
            mediaRecorder.start(1000); // Restart recording after sending data
        };
    }

    function sendAudioToServer() {
        if (audioChunks.length > 0) {
            const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
            const formData = new FormData();
            formData.append('audio', audioBlob, 'file.wav');

            fetch('/transcribe', {
                method: 'POST',
                body: formData
            })
            .then(response => response.json())
            .then(data => {
                document.getElementById('transcription').textContent = data.transcription;
            })
            .catch(console.error);
        }
    }

    document.getElementById('endCall').addEventListener('click', () => {
        clearInterval(silenceDetector);
        mediaRecorder.stop(); // This will also trigger the last data send if there are any chunks left
    });
</script>

</body>
</html>


            """





def format_list(lst:list):
    string = ''
    ctr = 0
    for c in lst:
        string += (c + " ")
        if(ctr > 4):
            string += ("\n")
            ctr = 0
        ctr += 1
    return string

conversation_history = []
@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
    global conversation_history
    if 'audio' not in request.files:
        return jsonify({"error": "No audio file provided"}), 400

    audio_file = request.files['audio']
    
    temp_dir = tempfile.mkdtemp(dir="D:\\DEV\\WebdevFolder\\RealEstateAI")
    try:
        # Save the audio file to a temporary file
        temp_audio_path = os.path.join(temp_dir, audio_file.filename)
        audio_file.save(temp_audio_path)

        
        result = model.transcribe(temp_audio_path)
        conversation_history.append( result['text'])
        conversation_history_str = format_list(conversation_history)
        return jsonify({"transcription": conversation_history_str})
    except Exception as e:
        logging.exception("An error occurred during transcription")
        return jsonify({"error": str(e)}), 500
    finally:
        time.sleep(10)
        # Cleanup: Remove temporary files
        os.remove(temp_audio_path)
        os.rmdir(temp_dir)
    

if __name__ == '__main__':
    app.run(debug=False, port=5000)


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
2024-05-03 12:24:43,554 - INFO - [33mPress CTRL+C to quit[0m
2024-05-03 12:24:46,960 - INFO - 127.0.0.1 - - [03/May/2024 12:24:46] "GET / HTTP/1.1" 200 -
2024-05-03 12:25:02,357 - INFO - 127.0.0.1 - - [03/May/2024 12:25:02] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:25:04,359 - INFO - 127.0.0.1 - - [03/May/2024 12:25:04] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:25:06,379 - INFO - 127.0.0.1 - - [03/May/2024 12:25:06] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:25:08,553 - INFO - 127.0.0.1 - - [03/May/2024 12:25:08] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:25:10,565 - INFO - 127.0.0.1 - - [03/May/2024 12:25:10] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:25:12,644 - INFO - 127.0.0.1 - - [03/May/2024 12:25:12] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:25:14,692 - INFO - 127.0.0.1 - - [03/May/2024 12:25:14] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:25:16,740 - INFO - 127.0.0.1 - - [03/May/2024 12:25:16] "POST /tra

2024-05-03 12:27:13,736 - INFO - 127.0.0.1 - - [03/May/2024 12:27:13] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:27:18,343 - INFO - 127.0.0.1 - - [03/May/2024 12:27:18] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:27:20,402 - INFO - 127.0.0.1 - - [03/May/2024 12:27:20] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:27:21,720 - INFO - 127.0.0.1 - - [03/May/2024 12:27:21] "POST /transcribe HTTP/1.1" 200 -
2024-05-03 12:27:23,085 - INFO - 127.0.0.1 - - [03/May/2024 12:27:23] "POST /transcribe HTTP/1.1" 200 -
