# **INSTALL**

In [None]:
!pip install colab-xterm
!pip install whisper
!pip install pyngrok
!pip install ollama
!pip install pynvml
!pip install langchain_community
!pip install langchain_huggingface
!pip install langchain_ollama
!pip install chromadb
!pip install pypdf
!pip install tiktoken
!pip install -U openai-whisper
!choco install ffmpeg
!pip install setuptools-rust
!apt-get update
!apt-get install -y portaudio19-dev
!pip install sounddevice
!pip install flask flask_cors torch torchaudio transformers diffusers pyngrok sounddevice numpy whisper rembg

Collecting colab-xterm
  Downloading colab_xterm-0.2.0-py3-none-any.whl.metadata (1.2 kB)
Downloading colab_xterm-0.2.0-py3-none-any.whl (115 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/115.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.6/115.6 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: colab-xterm
Successfully installed colab-xterm-0.2.0
Collecting whisper
  Downloading whisper-1.1.10.tar.gz (42 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m3.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: whisper
  Building wheel for whisper (setup.py) ... [?25l[?25hdone
  Created wheel for whisper: filename=whisper-1.1.10-py3-none-any.whl size=41120 sha256=522937754209a23a0e76a8e61f67124891199b51a088ab17625517caecc2f680
  Stored in directory: /roo

In [None]:
! whisper audio.flac audio.mp3 audio.wav --model large

100%|█████████████████████████████████████| 2.88G/2.88G [00:45<00:00, 67.5MiB/s]
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/whisper/audio.py", line 58, in load_audio
    out = run(cmd, capture_output=True, check=True).stdout
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/subprocess.py", line 571, in run
    raise CalledProcessError(retcode, process.args,
subprocess.CalledProcessError: Command '['ffmpeg', '-nostdin', '-threads', '0', '-i', 'audio.flac', '-f', 's16le', '-ac', '1', '-acodec', 'pcm_s16le', '-ar', '16000', '-']' returned non-zero exit status 1.

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/whisper/transcribe.py", line 615, in cli
    result = transcribe(model, audio_path, temperature=temperature, **args)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/loca

# **GPU USE**

In [None]:
from flask import Flask, jsonify, request
from flask_cors import CORS
import whisper
import torch
from diffusers import StableDiffusionPipeline
from rembg import remove
from PIL import Image
import base64
import io
from pyngrok import ngrok
import os

app = Flask(__name__)
CORS(app, resources={
    r"/*": {
        "origins": "*",
        "methods": ["GET", "POST", "OPTIONS"],
        "allow_headers": ["Content-Type", "Authorization"],
        "expose_headers": ["Content-Type", "Authorization"]
    }
})

whisper_model = whisper.load_model("large")

sd_model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5")
sd_model.enable_attention_slicing()
sd_model.to("cuda")

@app.route('/', methods=['GET'])
def home():
    return jsonify({"status": "Server is running"})

@app.route('/transcribe', methods=['GET', 'POST'])
def generate():
    if request.method == 'GET':
        return jsonify({"status": "Transcribe endpoint is ready"})

    try:
        data = request.json

        text_input = data.get('text', '')
        audio_data = data.get('audio', '')
        filename = data.get('filename', 'temp_audio.mp3')

        if audio_data:
            audio_bytes = base64.b64decode(audio_data)

            with open(filename, 'wb') as f:
                f.write(audio_bytes)

            result = whisper_model.transcribe(filename)
            text = result["text"]
        else:
            text = text_input

        if text.strip():
            with torch.autocast("cuda"):
                with torch.inference_mode():
                    image = sd_model(
                        text,
                        num_inference_steps=20,
                        guidance_scale=7.5
                    ).images[0]

            buffered = io.BytesIO()
            image.save(buffered, format="PNG")
            img_str = base64.b64encode(buffered.getvalue()).decode()

            return jsonify({
                'text': text,
                'image': img_str
            })

        return jsonify({'error': 'No text detected'})

    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/remove-background', methods=['POST'])
def remove_background():
    try:
        data = request.json
        image_data = data.get('imageUrl', '')

        if not image_data:
            return jsonify({'error': 'No image data provided'}), 400

        if ',' in image_data:
            image_data = image_data.split(',')[1]

        image_bytes = base64.b64decode(image_data)
        input_image = Image.open(io.BytesIO(image_bytes))

        output_image = remove(input_image)

        buffered = io.BytesIO()
        output_image.save(buffered, format="PNG")
        img_str = base64.b64encode(buffered.getvalue()).decode()

        return jsonify({
            'processedImageUrl': f'data:image/png;base64,{img_str}'
        })

    except Exception as e:
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    ngrok.set_auth_token("2rsixIKt8bxTKPpcGLSt7tlzJJt_6aoJdomfaNKJHYv3kkCih")
    ngrok_tunnel = ngrok.connect(5000)
    print(f"Public URL: {ngrok_tunnel}")
    app.run(host='0.0.0.0', port=5000)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

config.json: 0.00B [00:00, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Public URL: NgrokTunnel: "https://118aa7a8d191.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [20/Jul/2025 08:12:28] "OPTIONS /remove-background HTTP/1.1" 200 -
Downloading data from 'https://github.com/danielgatis/rembg/releases/download/v0.0.0/u2net.onnx' to file '/root/.u2net/u2net.onnx'.
100%|████████████████████████████████████████| 176M/176M [00:00<00:00, 241GB/s]
INFO:werkzeug:127.0.0.1 - - [20/Jul/2025 08:12:40] "POST /remove-background HTTP/1.1" 200 -
