In [2]:
%%capture
!pip install fairseq2
!pip install pydub sentencepiece
!pip install git+https://github.com/facebookresearch/seamless_communication.git
! pip install fastapi uvicorn python-multipart

In [5]:
! pip install python-multipart

Collecting python-multipart
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Downloading python_multipart-0.0.20-py3-none-any.whl (24 kB)
Installing collected packages: python-multipart
Successfully installed python-multipart-0.0.20


In [10]:
!pip install fastapi uvicorn pyngrok nest-asyncio


Collecting pyngrok
  Downloading pyngrok-7.2.8-py3-none-any.whl.metadata (10 kB)
Downloading pyngrok-7.2.8-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.8


In [3]:
import io
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import mmap
import numpy
import soundfile
import torchaudio
import torch
import warnings
warnings.filterwarnings("ignore")
from collections import defaultdict
from IPython.display import Audio, display
from pathlib import Path
from pydub import AudioSegment

from seamless_communication.inference import Translator
from seamless_communication.streaming.dataloaders.s2tt import SileroVADSilenceRemover
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
import uvicorn
import torchaudio
import tempfile
import os
import torch
from pydub import AudioSegment
from seamless_communication.inference import Translator

# SeamlessM4T Inference:

## Initialize the models:

In [5]:
%%writefile app.py
import io
import json
import tempfile
import os
import torch
import torchaudio
import warnings
from pydub import AudioSegment
from fastapi import FastAPI, File, UploadFile, Form, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from seamless_communication.inference import Translator
import uvicorn

# Silence warnings
warnings.filterwarnings("ignore")

app = FastAPI(title="Translation API")

# Initialize Translator
model_name = "seamlessM4T_v2_large"
vocoder_name = "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs"

translator = Translator(
    model_name,
    vocoder_name,
    device=torch.device("cuda:0"),
    dtype=torch.float16,
)

@app.get("/")
def root():
    return {"message": "Welcome to the Translation API"}

@app.get("/health")
def health():
    return {"status": "healthy"}

@app.post("/s2tt")
async def speech_to_text_translation(
    audio: UploadFile = File(...),
    src_lang: str = Form(...),
    tgt_lang: str = Form(...)
):
    try:
        if not audio.filename.endswith(".wav"):
            # Convert to wav using pydub
            temp = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
            audio_data = await audio.read()
            audio_segment = AudioSegment.from_file(io.BytesIO(audio_data))
            audio_segment.export(temp.name, format="wav")
            input_path = temp.name
        else:
            input_path = tempfile.NamedTemporaryFile(delete=False, suffix=".wav").name
            with open(input_path, "wb") as f:
                f.write(await audio.read())

        # Resample to 16kHz if necessary
        waveform, orig_freq = torchaudio.load(input_path)
        if orig_freq != 16000:
            waveform = torchaudio.functional.resample(waveform, orig_freq, 16000)
            torchaudio.save(input_path, waveform, 16000)

        # Translate: Calling the translator's prediction method
        text_output, _ = translator.predict(
            input=input_path,
            task_str="s2tt",
            tgt_lang=tgt_lang
        )
        os.remove(input_path)  # Clean up the temporary file

        # Ensure we only return a string as the response
        return {"translated_text": str(text_output[0])}

    except Exception as e:
        os.remove(input_path) if os.path.exists(input_path) else None
        raise HTTPException(status_code=500, detail=f"Error in S2TT: {str(e)}")

class T2TTRequest(BaseModel):
    text: str
    src_lang: str
    tgt_lang: str

@app.post("/t2tt")
def text_to_text_translation(req: T2TTRequest):
    try:
        # Translate: Calling the translator's prediction method
        text_output, _ = translator.predict(
            input=req.text,
            task_str="t2tt",
            tgt_lang=req.tgt_lang,
            src_lang=req.src_lang
        )

        # Ensure we return a string as the response
        return {"translated_text": str(text_output[0])}

    except Exception as e:
        raise HTTPException(status_code=500, detail=f"Error in T2TT: {str(e)}")



Writing app.py


In [8]:
!ngrok authtoken 2w8thcW8MSmMmR5dX0mtqpOMXdV_5p5vYumCPrS4op7gjyRh6

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [9]:
from pyngrok import ngrok
import nest_asyncio
import uvicorn

# Needed to allow uvicorn to run inside Colab
nest_asyncio.apply()

# Expose port 8000 with ngrok
public_url = ngrok.connect(8000)
print("Public URL:", public_url)

Public URL: NgrokTunnel: "https://023a-34-124-166-92.ngrok-free.app" -> "http://localhost:8000"


In [10]:
!uvicorn app:app --host 0.0.0.0 --port 8000

Using the cached checkpoint of seamlessM4T_v2_large. Set `force` to `True` to download again.
Using the cached tokenizer of seamlessM4T_v2_large. Set `force` to `True` to download again.
Using the cached tokenizer of seamlessM4T_v2_large. Set `force` to `True` to download again.
Using the cached tokenizer of seamlessM4T_v2_large. Set `force` to `True` to download again.
Using the cached checkpoint of vocoder_v2. Set `force` to `True` to download again.
[32mINFO[0m:     Started server process [[36m10629[0m]
[32mINFO[0m:     Waiting for application startup.
[32mINFO[0m:     Application startup complete.
[32mINFO[0m:     Uvicorn running on [1mhttp://0.0.0.0:8000[0m (Press CTRL+C to quit)
[32mINFO[0m:     197.55.7.69:0 - "[1mGET / HTTP/1.1[0m" [32m200 OK[0m
[32mINFO[0m:     197.55.7.69:0 - "[1mGET /favicon.ico HTTP/1.1[0m" [31m404 Not Found[0m
[32mINFO[0m:     35.201.145.24:0 - "[1mPOST /t2tt HTTP/1.1[0m" [91m500 Internal Server Error[0m
[32mINFO[0m:     35.