In [None]:
!pip install flask flask-cors openai-whisper pydub ffmpeg-python


Collecting flask-cors
  Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl.metadata (5.5 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Downloading Flask_Cors-5.0.0-py2.py3-none-any.whl (14 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Installing collected packages: pydub, ffmpeg-python, flask-cors
Successfully installed ffmpeg-python-0.2.0 flask-cors-5.0.0 pydub-0.25.1


In [10]:
import os

# Define project structure
folders = ["templates", "static", "uploads"]

for folder in folders:
    os.makedirs(folder, exist_ok=True)

print("Project structure created!")


Project structure created!


In [11]:
%%writefile app.py
from flask import Flask, render_template, request, jsonify
import whisper
import os
from datetime import datetime
from pydub import AudioSegment

app = Flask(__name__)
app.config["UPLOAD_FOLDER"] = "uploads"

# Ensure the upload folder exists
os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)

# Load Whisper model
model = whisper.load_model("base")

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/upload", methods=["POST"])
def upload_audio():
    if "audio" not in request.files:
        return jsonify({"error": "No audio file uploaded"}), 400

    audio_file = request.files["audio"]
    filename = datetime.now().strftime("%Y%m%d%H%M%S") + ".wav"
    file_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)

    audio_file.save(file_path)

    # Transcribe using Whisper
    result = model.transcribe(file_path)
    text = result["text"]

    return jsonify({"transcription": text})

if __name__ == "__main__":
    app.run(debug=True)


Overwriting app.py


In [12]:
%%writefile templates/index.html
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Speech to Text</title>
    <link rel="stylesheet" href="{{ url_for('static', filename='style.css') }}">
</head>
<body>
    <div class="container">
        <h1>Speech to Text Converter</h1>
        <button id="recordBtn">🎤 Start Recording</button>
        <p id="status">Press the button to start recording.</p>
        <p><strong>Transcription:</strong></p>
        <div id="transcription"></div>
    </div>
    <script src="{{ url_for('static', filename='script.js') }}"></script>
</body>
</html>


Writing templates/index.html


In [13]:
%%writefile static/style.css
body {
    font-family: Arial, sans-serif;
    text-align: center;
    padding: 20px;
    background-color: #f4f4f4;
}

.container {
    background: white;
    padding: 20px;
    border-radius: 8px;
    box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
    display: inline-block;
}

button {
    background-color: #007bff;
    color: white;
    border: none;
    padding: 10px 20px;
    cursor: pointer;
    font-size: 16px;
    border-radius: 5px;
}

button:active {
    background-color: blue;
}


Writing static/style.css


In [14]:
%%writefile static/script.js
let mediaRecorder;
let audioChunks = [];

document.getElementById("recordBtn").addEventListener("click", async function () {
    const status = document.getElementById("status");
    const recordBtn = this;

    if (!mediaRecorder || mediaRecorder.state === "inactive") {
        const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
        mediaRecorder = new MediaRecorder(stream);

        mediaRecorder.ondataavailable = event => {
            audioChunks.push(event.data);
        };

        mediaRecorder.onstop = async () => {
            const audioBlob = new Blob(audioChunks, { type: "audio/wav" });
            audioChunks = [];
            status.innerText = "Uploading...";

            const formData = new FormData();
            formData.append("audio", audioBlob, "recording.wav");

            // Send to Flask backend
            const response = await fetch("/upload", {
                method: "POST",
                body: formData,
            });
            const data = await response.json();

            document.getElementById("transcription").innerText = data.transcription || "Error transcribing.";
            status.innerText = "Recording stopped.";
        };

        mediaRecorder.start();
        recordBtn.innerText = "⏹ Stop Recording";
        status.innerText = "Recording...";
    } else {
        mediaRecorder.stop();
        recordBtn.innerText = "🎤 Start Recording";
    }
});


Writing static/script.js


In [None]:
!pip install flask flask-cors openai-whisper pydub ffmpeg-python openai flask-ngrok pyngrok





In [15]:
%%writefile app.py
from flask import Flask, render_template, request, jsonify
import whisper
import os
from datetime import datetime
from pydub import AudioSegment
from flask_ngrok import run_with_ngrok

app = Flask(__name__)
run_with_ngrok(app)  # Enables ngrok

app.config["UPLOAD_FOLDER"] = "uploads"

# Ensure upload folder exists
os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)

# Load Whisper model
model = whisper.load_model("large")

@app.route("/")
def index():
    return render_template("index.html")

@app.route("/upload", methods=["POST"])
def upload_audio():
    if "audio" not in request.files:
        return jsonify({"error": "No audio file uploaded"}), 400

    audio_file = request.files["audio"]
    filename = datetime.now().strftime("%Y%m%d%H%M%S") + ".wav"
    file_path = os.path.join(app.config["UPLOAD_FOLDER"], filename)

    audio_file.save(file_path)

    # Transcribe using Whisper
    result = model.transcribe(file_path, task="translate",)
    text = result["text"]

    return jsonify({"transcription": text})

if __name__ == "__main__":
    app.run()


Overwriting app.py


In [26]:
from pyngrok import ngrok

# Start Flask on port 5000
public_url = ngrok.connect(5000)
print(f"Public URL: {public_url}")

!python app.py

Public URL: NgrokTunnel: "https://79d7-35-243-215-138.ngrok-free.app" -> "http://localhost:5000"
100%|████████████████████████████████████████| 139M/139M [00:01<00:00, 100MiB/s]
  checkpoint = torch.load(fp, map_location=device)
 * Serving Flask app 'app'
 * Debug mode: off
 * Running on http://127.0.0.1:5000
[33mPress CTRL+C to quit[0m
Usage of ngrok requires a verified account and authtoken.

Sign up for an account: https://dashboard.ngrok.com/signup
Install your authtoken: https://dashboard.ngrok.com/get-started/your-authtoken

ERR_NGROK_4018

 * Running on http://79d7-35-243-215-138.ngrok-free.app
 * Traffic stats available on http://127.0.0.1:4040
127.0.0.1 - - [05/Feb/2025 13:55:26] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [05/Feb/2025 13:55:27] "GET /static/script.js HTTP/1.1" 200 -
127.0.0.1 - - [05/Feb/2025 13:55:27] "GET /static/style.css HTTP/1.1" 200 -
127.0.0.1 - - [05/Feb/2025 13:55:28] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
127.0.0.1 - - [05/Feb/2025 13:55:59] "POST /u

In [17]:
!ngrok authtoken 

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [24]:
!pip install flask-ngrok


Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Installing collected packages: flask-ngrok
Successfully installed flask-ngrok-0.0.25


In [22]:
!apt-get install ffmpeg


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 18 not upgraded.
