<a href="https://colab.research.google.com/github/Shriram-26/Computer-Vision/blob/main/shree_med.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install flask pyngrok pydub ffmpeg-python openai-whisper google-generativeai --quiet

import os, tempfile, re, traceback
import json
from datetime import timedelta
from flask import Flask, request, jsonify, render_template_string
from pydub import AudioSegment
import whisper
from google import genai
from google.genai import types
from pyngrok import ngrok, conf

# ---------------- API KEYS ----------------
GEN_API_KEY = "AIzaSyCRSWxAMNGLEi5n4KHwtgb06nbDwSwBqt4"

client = genai.Client(api_key=GEN_API_KEY)

# ---------------- Load Whisper ----------------
print("Loading Whisper model...")
model = whisper.load_model("large")
print("Whisper model loaded successfully!")

# ---------------- Supported Languages ----------------
LANGUAGES = {
    "en": "English", "hi": "Hindi", "mr": "Marathi",
    "ta": "Tamil", "te": "Telugu", "gu": "Gujarati",
    "kn": "Kannada", "bn": "Bengali", "ur": "Urdu"
}

app = Flask(__name__)
app.config["UPLOAD_FOLDER"] = "/content/uploads"
os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)

# ----------- Helper Functions -----------
def preprocess_audio(file_path):
    try:
        print("Preprocessing audio...")
        audio = AudioSegment.from_file(file_path)
        audio = audio.set_channels(1).set_frame_rate(16000)
        processed_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
        audio.export(processed_path, format="wav")
        return processed_path
    except Exception as e:
        print(f"Error in preprocess_audio: {e}")
        raise

def transcribe_audio(audio_path, chunk_length_sec=60, language=None):
    try:
        print("Transcribing audio...")
        audio = AudioSegment.from_file(audio_path)
        duration = len(audio) / 1000
        print(f"Audio duration: {duration} seconds")

        chunks = [audio[i:i+chunk_length_sec*1000] for i in range(0, len(audio), chunk_length_sec*1000)]
        full_text = ""

        for i, chunk in enumerate(chunks):
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
                chunk.export(temp_audio.name, format="wav")
                result = model.transcribe(temp_audio.name, language=language)
                start_time = str(timedelta(seconds=i * chunk_length_sec))
                full_text += f"[{start_time}]\n{result['text'].strip()}\n\n"
                os.remove(temp_audio.name)

        return full_text.strip()
    except Exception as e:
        print(f"Error in transcribe_audio: {e}")
        raise

def correct_text_with_gemini(text, audio_path=None):
    """
    Returns:
      clean_text: human-readable corrected transcription
      json_obj:   structured JSON dict representing the same content (flexible schema)
    """
    try:
        prompt = (
    "You are given an audio transcription along with the original audio file.\n"
    "Your tasks:\n"
    "1) Correct grammar and spelling.\n"
    "2) Improve formatting and readability.\n"
    "3) DO NOT change the order of information.\n"
    "4) The JSON MUST follow the EXACT SAME SEQUENCE of sections as the corrected transcription.\n"
    "5) Every heading and sub-heading in the transcription must appear in the JSON in the same order.\n"
    "6) Only include fields that actually exist in the transcription. Do NOT add new fields.\n"
    "7) Preserve meaning exactly. No invented data.\n\n"

    "OUTPUT FORMAT (VERY IMPORTANT):\n"
    "First output:\n"
    "<<<TEXT>>>\n"
    "[clean corrected transcription formatted normally (NO asterisks, NO markdown)]\n"
    "<<<JSON>>>\n"
    "[valid JSON only, no comments]\n\n"

    f"Transcription:\n{text}"
)

        parts = [types.Part.from_text(text=prompt)]

        if audio_path:
            with open(audio_path, "rb") as f:
                audio_bytes = f.read()
            parts.append(types.Part.from_bytes(data=audio_bytes, mime_type="audio/wav"))

        resp = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=parts,
        )

        resp_text = (resp.text or "").strip()
        clean_text = text  # fallback
        json_obj = None

        if "<<<JSON>>>" in resp_text:
            text_part, json_part = resp_text.split("<<<JSON>>>", 1)

            if "<<<TEXT>>>" in text_part:
                text_part = text_part.split("<<<TEXT>>>", 1)[1]

                clean_text = text_part.strip()
                clean_text = re.sub(r"\*+", "", clean_text)
                json_str = json_part.strip()

            try:
                json_obj = json.loads(json_str)
            except Exception as e:
                print("JSON parse error from Gemini:", e)
                # keep raw JSON for debugging
                json_obj = {"rawJson": json_str}
        else:
            # if model ignored markers, just clean markdown and no JSON
            clean_text = re.sub(r"\*+", "", resp_text).strip()
            json_obj = None

        return clean_text, json_obj

    except Exception as e:
        print(f"Error in correct_text_with_gemini: {e}")
        # fallback: original text, no JSON
        return text, None

# ----------- HTML Template -----------
INDEX_HTML = """
<!doctype html>
<html>
<head>
  <meta charset="utf-8">
  <title>Speech-to-Text</title>
</head>
<body>
  <h1>Upload Audio</h1>
  <form method="POST" action="/process" enctype="multipart/form-data">
    <label>Audio File:</label>
    <input type="file" name="file" accept="audio/*" required><br><br>
    <label>Source Language (e.g., en, hi):</label>
    <input type="text" name="language" value="en"><br><br>
    <button type="submit">Transcribe</button>
  </form>

  {% if results %}
  <h2>Results</h2>

  <h3>Corrected Transcription:</h3>
  <pre>{{ results.corrected_text }}</pre>

  {% if results.structured_json %}
  <h3>Structured JSON:</h3>
  <pre>{{ results.structured_json | tojson(indent=2) }}</pre>
  {% endif %}
  {% endif %}

  {% if error %}
  <h3 style="color:red;">Error: {{ error }}</h3>
  {% endif %}
</body>
</html>
"""

# ----------- Routes -----------
@app.route("/")
def index():
    return render_template_string(INDEX_HTML, results=None, error=None)

@app.route("/process", methods=["POST"])
def process_audio():
    try:
        if "file" not in request.files:
            return render_template_string(INDEX_HTML, results=None, error="No audio file uploaded")

        file = request.files["file"]
        lang = request.form.get("language", "en")

        if file.filename == "":
            return render_template_string(INDEX_HTML, results=None, error="No file selected")

        save_path = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
        file.save(save_path)

        processed_path = preprocess_audio(save_path)
        transcribed_text = transcribe_audio(processed_path, language=lang)
        corrected_text, structured_json = correct_text_with_gemini(transcribed_text, processed_path)


        if os.path.exists(processed_path):
            os.remove(processed_path)
        if os.path.exists(save_path):
            os.remove(save_path)

        results = {
            "raw_transcription": transcribed_text,
            "corrected_text": corrected_text,
            "structured_json": structured_json,
        }

        return render_template_string(INDEX_HTML, results=results, error=None)

    except Exception as e:
        error_msg = f"Error processing audio: {str(e)}"
        traceback.print_exc()
        return render_template_string(INDEX_HTML, results=None, error=error_msg)

# ---------------- ngrok setup ----------------
conf.get_default().auth_token = "35VPeqrSlq4sokUPQpyykibij0z_6wNDXdmZbWAQJqPcx4xLq"
port = 5000
public_url = ngrok.connect(port).public_url
print("Ngrok URL:", public_url)

if __name__ == "__main__":
    app.run(host="0.0.0.0",port=port)

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/803.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m798.7/803.2 kB[0m [31m33.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m803.2/803.2 kB[0m [31m16.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for openai-whisper (pyproject.toml) ... [?25l[?25hdone


  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):


Loading Whisper model...


100%|██████████████████████████████████████| 2.88G/2.88G [00:17<00:00, 175MiB/s]


Whisper model loaded successfully!
Ngrok URL: https://18ddf377c6e7.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [15/Nov/2025 07:45:58] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [15/Nov/2025 07:45:58] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


Preprocessing audio...
Transcribing audio...
Audio duration: 247.512 seconds


INFO:werkzeug:127.0.0.1 - - [15/Nov/2025 07:48:31] "POST /process HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [15/Nov/2025 07:49:38] "GET / HTTP/1.1" 200 -


Preprocessing audio...
Transcribing audio...
Audio duration: 300.33 seconds


INFO:werkzeug:127.0.0.1 - - [15/Nov/2025 07:51:12] "POST /process HTTP/1.1" 200 -


In [None]:
!pip install flask pyngrok pydub ffmpeg-python openai-whisper google-generativeai --quiet

import os, tempfile, re, traceback
import json
from datetime import timedelta
from flask import Flask, request, jsonify, render_template_string
from pydub import AudioSegment
import whisper
from google import genai
from google.genai import types
from pyngrok import ngrok, conf

# ---------------- API KEYS ----------------
GEN_API_KEY = "AIzaSyCRSWxAMNGLEi5n4KHwtgb06nbDwSwBqt4"

client = genai.Client(api_key=GEN_API_KEY)

# ---------------- Load Whisper ----------------
print("Loading Whisper model...")
model = whisper.load_model("large")
print("Whisper model loaded successfully!")

# ---------------- Supported Languages ----------------
LANGUAGES = {
    "en": "English", "hi": "Hindi", "mr": "Marathi",
    "ta": "Tamil", "te": "Telugu", "gu": "Gujarati",
    "kn": "Kannada", "bn": "Bengali", "ur": "Urdu"
}

app = Flask(__name__)
app.config["UPLOAD_FOLDER"] = "/content/uploads"
os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)

# ----------- Helper Functions -----------
def preprocess_audio(file_path):
    try:
        print("Preprocessing audio...")
        audio = AudioSegment.from_file(file_path)
        audio = audio.set_channels(1).set_frame_rate(16000)
        processed_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
        audio.export(processed_path, format="wav")
        return processed_path
    except Exception as e:
        print(f"Error in preprocess_audio: {e}")
        raise

def transcribe_audio(audio_path, chunk_length_sec=60, language=None):
    try:
        print("Transcribing audio...")
        audio = AudioSegment.from_file(audio_path)
        duration = len(audio) / 1000
        print(f"Audio duration: {duration} seconds")

        chunks = [audio[i:i+chunk_length_sec*1000] for i in range(0, len(audio), chunk_length_sec*1000)]
        full_text = ""

        for i, chunk in enumerate(chunks):
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
                chunk.export(temp_audio.name, format="wav")
                result = model.transcribe(temp_audio.name, language=language)
                start_time = str(timedelta(seconds=i * chunk_length_sec))
                full_text += f"[{start_time}]\n{result['text'].strip()}\n\n"
                os.remove(temp_audio.name)

        return full_text.strip()
    except Exception as e:
        print(f"Error in transcribe_audio: {e}")
        raise

def correct_text_with_gemini(text, audio_path=None):
    """
    Returns:
      clean_text: human-readable corrected transcription
      json_obj:   structured JSON dict representing the same content (flexible schema)
    """
    try:
        prompt = (
    "You are given an audio transcription along with the original audio file.\n"
    "Your tasks:\n"
    "1) Correct grammar and spelling.\n"
    "2) Improve formatting and readability.\n"
    "3) DO NOT change the order of information.\n"
    "4) The JSON MUST follow the EXACT SAME SEQUENCE of sections as the corrected transcription.\n"
    "5) Every heading and sub-heading in the transcription must appear in the JSON in the same order.\n"
    "6) Only include fields that actually exist in the transcription. Do NOT add new fields.\n"
    "7) Preserve meaning exactly. No invented data.\n\n"

    "OUTPUT FORMAT (VERY IMPORTANT):\n"
    "First output:\n"
    "<<<TEXT>>>\n"
    "[clean corrected transcription formatted normally (NO asterisks, NO markdown)]\n"
    "<<<JSON>>>\n"
    "[valid JSON only, no comments]\n\n"

    f"Transcription:\n{text}"
)

        parts = [types.Part.from_text(text=prompt)]

        if audio_path:
            with open(audio_path, "rb") as f:
                audio_bytes = f.read()
            parts.append(types.Part.from_bytes(data=audio_bytes, mime_type="audio/wav"))

        resp = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=parts,
        )

        resp_text = (resp.text or "").strip()
        clean_text = text  # fallback
        json_obj = None

        if "<<<JSON>>>" in resp_text:
            text_part, json_part = resp_text.split("<<<JSON>>>", 1)

            if "<<<TEXT>>>" in text_part:
                text_part = text_part.split("<<<TEXT>>>", 1)[1]

                clean_text = text_part.strip()
                clean_text = re.sub(r"\*+", "", clean_text)
                json_str = json_part.strip()

            try:
                json_obj = json.loads(json_str)
            except Exception as e:
                print("JSON parse error from Gemini:", e)
                # keep raw JSON for debugging
                json_obj = {"rawJson": json_str}
        else:
            # if model ignored markers, just clean markdown and no JSON
            clean_text = re.sub(r"\*+", "", resp_text).strip()
            json_obj = None

        return clean_text, json_obj

    except Exception as e:
        print(f"Error in correct_text_with_gemini: {e}")
        # fallback: original text, no JSON
        return text, None

# ----------- HTML Template -----------
INDEX_HTML = """
<!doctype html>4
<html>
<head>
  <meta charset="utf-8">
  <title>Speech-to-Text</title>
</head>
<body>
  <h1>Upload Audio</h1>
  <form method="POST" action="/process" enctype="multipart/form-data">
    <label>Audio File:</label>
    <input type="file" name="file" accept="audio/*" required><br><br>
    <label>Source Language (e.g., en, hi):</label>
    <input type="text" name="language" value="en"><br><br>
    <button type="submit">Transcribe</button>
  </form>

  {% if results %}
  <h2>Results</h2>

  <h3>Corrected Transcription:</h3>
  <pre>{{ results.corrected_text }}</pre>

  {% if results.structured_json %}
  <h3>Structured JSON:</h3>
  <pre>{{ results.structured_json | tojson(indent=2) }}</pre>
  {% endif %}
  {% endif %}

  {% if error %}
  <h3 style="color:red;">Error: {{ error }}</h3>
  {% endif %}
</body>
</html>
"""

# ----------- Routes -----------
@app.route("/")
def index():
    return render_template_string(INDEX_HTML, results=None, error=None)

@app.route("/process", methods=["POST"])
def process_audio():
    try:
        if "file" not in request.files:
            return render_template_string(INDEX_HTML, results=None, error="No audio file uploaded")

        file = request.files["file"]
        lang = request.form.get("language", "en")

        if file.filename == "":
            return render_template_string(INDEX_HTML, results=None, error="No file selected")

        save_path = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
        file.save(save_path)

        processed_path = preprocess_audio(save_path)
        transcribed_text = transcribe_audio(processed_path, language=lang)
        corrected_text, structured_json = correct_text_with_gemini(transcribed_text, processed_path)


        if os.path.exists(processed_path):
            os.remove(processed_path)
        if os.path.exists(save_path):
            os.remove(save_path)

        results = {
            "raw_transcription": transcribed_text,
            "corrected_text": corrected_text,
            "structured_json": structured_json,
        }

        return render_template_string(INDEX_HTML, results=results, error=None)

    except Exception as e:
        error_msg = f"Error processing audio: {str(e)}"
        traceback.print_exc()
        return render_template_string(INDEX_HTML, results=None, error=error_msg)

# ---------------- ngrok setup ----------------
conf.get_default().auth_token = "32ECeIuD0tL9WRaRUbldZXOwIa8_uo4NzZG79isxV8Ct6txf"
port = 5000
public_url = ngrok.connect(port).public_url
print("Ngrok URL:", public_url)

if __name__ == "__main__":
    app.run(host="0.0.0.0",port=port)

In [None]:
!pip install flask pyngrok pydub ffmpeg-python openai-whisper google-generativeai --quiet

import os, tempfile, re, traceback
import json
from datetime import timedelta
from flask import Flask, request, jsonify, render_template_string
from pydub import AudioSegment
import whisper
from google import genai
from google.genai import types
from pyngrok import ngrok, conf

# ---------------- API KEYS ----------------
GEN_API_KEY = "AIzaSyCRSWxAMNGLEi5n4KHwtgb06nbDwSwBqt4"

client = genai.Client(api_key=GEN_API_KEY)

# ---------------- Load Whisper ----------------
print("Loading Whisper model...")
model = whisper.load_model("large")
print("Whisper model loaded successfully!")

# ---------------- Supported Languages ----------------
LANGUAGES = {
    "en": "English", "hi": "Hindi", "mr": "Marathi",
    "ta": "Tamil", "te": "Telugu", "gu": "Gujarati",
    "kn": "Kannada", "bn": "Bengali", "ur": "Urdu"
}

app = Flask(__name__)
app.config["UPLOAD_FOLDER"] = "/content/uploads"
os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)

# ----------- Helper Functions -----------
def preprocess_audio(file_path):
    try:
        print("Preprocessing audio...")
        audio = AudioSegment.from_file(file_path)
        audio = audio.set_channels(1).set_frame_rate(16000)
        processed_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
        audio.export(processed_path, format="wav")
        return processed_path
    except Exception as e:
        print(f"Error in preprocess_audio: {e}")
        raise

def transcribe_audio(audio_path, chunk_length_sec=60, language=None):
    try:
        print("Transcribing audio...")
        audio = AudioSegment.from_file(audio_path)
        duration = len(audio) / 1000
        print(f"Audio duration: {duration} seconds")

        chunks = [audio[i:i+chunk_length_sec*1000] for i in range(0, len(audio), chunk_length_sec*1000)]
        full_text = ""

        for i, chunk in enumerate(chunks):
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_audio:
                chunk.export(temp_audio.name, format="wav")
                result = model.transcribe(temp_audio.name, language=language)
                start_time = str(timedelta(seconds=i * chunk_length_sec))
                full_text += f"[{start_time}]\n{result['text'].strip()}\n\n"
                os.remove(temp_audio.name)

        return full_text.strip()
    except Exception as e:
        print(f"Error in transcribe_audio: {e}")
        raise

def correct_text_with_gemini(text, audio_path=None):
    """
    Returns:
      clean_text: human-readable corrected transcription
      json_obj:   structured JSON dict representing the same content (flexible schema)
    """
    try:
        prompt = (
    "You are given an audio transcription along with the original audio file.\n"
    "Your tasks:\n"
    "1) Correct grammar and spelling.\n"
    "2) Improve formatting and readability.\n"
    "3) DO NOT change the order of information.\n"
    "4) The JSON MUST follow the EXACT SAME SEQUENCE of sections as the corrected transcription.\n"
    "5) Every heading and sub-heading in the transcription must appear in the JSON in the same order.\n"
    "6) Only include fields that actually exist in the transcription. Do NOT add new fields.\n"
    "7) Preserve meaning exactly. No invented data.\n"
    "8)IMPORTANT: Do NOT use Markdown formatting or code fences in the JSON output.\n\n"


    "OUTPUT FORMAT (VERY IMPORTANT):\n"
    "First output:\n"
    "<<<TEXT>>>\n"
    "[clean corrected transcription formatted normally (NO asterisks, NO markdown)]\n"
    "<<<JSON>>>\n"
    "[PURE JSON ONLY, NO ``` MARKDOWN FENCES, NO EXTRA TEXT]\n\n"

    f"Transcription:\n{text}"
)

        parts = [types.Part.from_text(text=prompt)]

        if audio_path:
            with open(audio_path, "rb") as f:
                audio_bytes = f.read()
            parts.append(types.Part.from_bytes(data=audio_bytes, mime_type="audio/wav"))

        resp = client.models.generate_content(
            model="gemini-2.5-flash",
            contents=parts,
        )

        resp_text = (resp.text or "").strip()
        clean_text = text  # fallback
        json_obj = None

        if "<<<JSON>>>" in resp_text:
            text_part, json_part = resp_text.split("<<<JSON>>>", 1)

            if "<<<TEXT>>>" in text_part:
                text_part = text_part.split("<<<TEXT>>>", 1)[1]

            clean_text = re.sub(r"\*+", "", text_part).strip()
            json_str = json_part.strip()

            # Clean Markdown JSON block
            json_clean = re.sub(r"^```json|```$", "", json_str.strip(), flags=re.IGNORECASE).strip()

            try:
                json_obj = json.loads(json_clean)
            except Exception as e:
                print("JSON parse error:", e)
                json_obj = {"rawJson": json_str}
        else:
            # if model ignored markers, just clean markdown and no JSON
            clean_text = re.sub(r"\*+", "", resp_text).strip()
            json_obj = None

        return clean_text, json_obj

    except Exception as e:
        print(f"Error in correct_text_with_gemini: {e}")
        # fallback: original text, no JSON
        return text, None

# ----------- HTML Template -----------
INDEX_HTML = """
<!doctype html>
<html>
<head>
  <meta charset="utf-8">
  <title>Speech-to-Text</title>
</head>
<body>
  <h1>Upload Audio</h1>
  <form method="POST" action="/process" enctype="multipart/form-data">
    <label>Audio File:</label>
    <input type="file" name="file" accept="audio/*" required><br><br>
    <label>Source Language (e.g., en, hi):</label>
    <input type="text" name="language" value="en"><br><br>
    <button type="submit">Transcribe</button>
  </form>

  {% if results %}
  <h2>Results</h2>

  <h3>Corrected Transcription:</h3>
  <pre>{{ results.corrected_text }}</pre>

  {% if results.structured_json %}
  <h3>Structured JSON:</h3>
  <pre>{{ results.structured_json | tojson(indent=2) }}</pre>
  {% endif %}
  {% endif %}

  {% if error %}
  <h3 style="color:red;">Error: {{ error }}</h3>
  {% endif %}
</body>
</html>
"""

# ----------- Routes -----------
@app.route("/")
def index():
    return render_template_string(INDEX_HTML, results=None, error=None)

@app.route("/process", methods=["POST"])
def process_audio():
    try:
        if "file" not in request.files:
            return render_template_string(INDEX_HTML, results=None, error="No audio file uploaded")

        file = request.files["file"]
        lang = request.form.get("language", "en")

        if file.filename == "":
            return render_template_string(INDEX_HTML, results=None, error="No file selected")

        save_path = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
        file.save(save_path)

        processed_path = preprocess_audio(save_path)
        transcribed_text = transcribe_audio(processed_path, language=lang)
        corrected_text, structured_json = correct_text_with_gemini(transcribed_text, processed_path)


        if os.path.exists(processed_path):
            os.remove(processed_path)
        if os.path.exists(save_path):
            os.remove(save_path)

        results = {
            "raw_transcription": transcribed_text,
            "corrected_text": corrected_text,
            "structured_json": structured_json,
        }

        return render_template_string(INDEX_HTML, results=results, error=None)

    except Exception as e:
        error_msg = f"Error processing audio: {str(e)}"
        traceback.print_exc()
        return render_template_string(INDEX_HTML, results=None, error=error_msg)

# ---------------- ngrok setup ----------------
conf.get_default().auth_token = "35VJWwscw95v1LZHmGZ2VoMa9D1_NELmUH3FKLTFCoJChtiT"
port = 5000
public_url = ngrok.connect(port).public_url
print("Ngrok URL:", public_url)

if __name__ == "__main__":
    app.run(host="0.0.0.0",port=port)

Loading Whisper model...
Whisper model loaded successfully!
Ngrok URL: https://caf641733577.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [15/Nov/2025 06:56:04] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [15/Nov/2025 06:56:04] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -


Preprocessing audio...
Transcribing audio...
Audio duration: 300.33 seconds


INFO:werkzeug:127.0.0.1 - - [15/Nov/2025 06:57:57] "POST /process HTTP/1.1" 200 -
