In [None]:
!pip install -U \
  nest_asyncio \
  fastapi \
  uvicorn \
  pyngrok \
  yt-dlp \
  moviepy \
  langdetect \
  requests \
  transformers \
  torch \
  torchvision \
  torchaudio \
  sentencepiece \
  git+https://github.com/openai/whisper.git

In [None]:
!pip install torch==2.0.1+cu117 torchvision==0.15.2+cu117 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu117

In [None]:
!pip install --upgrade git+https://github.com/openai/whisper.git

In [None]:
import torch
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("Device:", torch.cuda.get_device_name(0))

In [None]:
# Удаляем несовместимые версии
!pip uninstall -y torch torchvision torchaudio

# Ставим стабильные версии для CUDA 11.7 (P100)
!pip install torch==2.0.1+cu117 torchvision==0.15.2+cu117 torchaudio==2.0.2 --index-url https://download.pytorch.org/whl/cu117

# Обновляем whisper
!pip install --upgrade git+https://github.com/openai/whisper.git


In [None]:
!pip install fastapi==0.103.2 "uvicorn[standard]==0.23.2"


In [None]:
!pip install "uvicorn[standard]==0.23.2" fastapi==0.103.2 --force-reinstall


In [2]:
!ngrok authtoken 2ttGu5JNnkTFeQy3QoilPOFwAvV_6hNru66rgjjPERy3aYZb8
import nest_asyncio
nest_asyncio.apply()

import uvicorn
from pyngrok import ngrok
from fastapi import FastAPI, UploadFile, File, HTTPException, Form
from fastapi.responses import JSONResponse
import os, shutil, requests, subprocess, yt_dlp

# ─── Hugging Face Summarizers (CPU only) ───
from transformers import pipeline
from langdetect import detect

summarizer_en = pipeline(
    "summarization",
    model="facebook/bart-large-cnn",
    device=-1   # CPU
)
summarizer_ru = pipeline(
    "summarization",
    model="csebuetnlp/mT5_multilingual_XLSum",
    device=-1   # CPU
)

# ─── Whisper (GPU small) ───
import torch, whisper

print("CUDA available:", torch.cuda.is_available())
print("Device:", torch.cuda.get_device_name(0))

# грузим модель
whisper_model = whisper.load_model("small", device="cuda")
print("✅ Whisper small GPU загружен")

def whisper_transcribe(file_path: str) -> str:
    result = whisper_model.transcribe(file_path)
    return result["text"]


# ─── Utils ───
UPLOAD_FOLDER = "uploads"
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
MAX_FILE_SIZE = 100 * 1024 * 1024  # 100 MB

def convert_to_wav(input_path: str) -> str:
    base, _ = os.path.splitext(input_path)
    output_path = base + ".wav"
    subprocess.run(
        ["ffmpeg", "-y", "-i", input_path, "-ar", "16000", "-ac", "1", output_path],
        check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
    )
    return output_path

# 🔥 Фикс: проверяем расширение, чтобы не дергать yt-dlp на mp4/mp3
ALLOWED_DIRECT_EXTS = (".mp4", ".mp3", ".wav", ".m4a", ".ogg", ".mkv", ".avi", ".mov")

def download_media(source_url: str, output_dir="uploads") -> str:
    os.makedirs(output_dir, exist_ok=True)

    # если ссылка ведет прямо на файл → качаем requests
    if any(source_url.lower().endswith(ext) for ext in ALLOWED_DIRECT_EXTS):
        filename = os.path.basename(source_url.split("?")[0]) or "downloaded_file"
        file_path = os.path.join(output_dir, filename)
        with requests.get(source_url, stream=True) as r:
            r.raise_for_status()
            with open(file_path, "wb") as f:
                shutil.copyfileobj(r.raw, f)
        return file_path

    # иначе используем yt-dlp
    output_path = os.path.join(output_dir, "input.%(ext)s")
    ydl_opts = {
        "format": "bestaudio/best",
        "outtmpl": output_path,
        "quiet": True,
        "noplaylist": True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(source_url, download=True)
        return ydl.prepare_filename(info)

# ─── Chunk helper ───
def chunk_text(text, max_len=800):
    words = text.split()
    chunks, cur = [], []
    for w in words:
        cur.append(w)
        if len(" ".join(cur)) > max_len:
            chunks.append(" ".join(cur))
            cur = []
    if cur:
        chunks.append(" ".join(cur))
    return chunks

def generate_summary(text: str) -> str:
    try:
        lang = detect(text)
    except:
        lang = "en"

    summarizer = summarizer_ru if lang == "ru" else summarizer_en

    chunks = chunk_text(text, max_len=800)
    summary_parts = []
    for ch in chunks:
        out = summarizer(ch, max_length=200, min_length=50, do_sample=False)
        summary_parts.append(out[0]["summary_text"])
    return " ".join(summary_parts)

# ─── FastAPI ───
app = FastAPI()

@app.post("/transcribe")
async def transcribe(
    file: UploadFile = File(None),
    source_url: str = Form(None),
    do_summary: bool = Form(True),
    session_id: str = Form("default")
):
    session_folder = os.path.join(UPLOAD_FOLDER, session_id)
    os.makedirs(session_folder, exist_ok=True)

    if source_url:
        try:
            file_path = download_media(source_url, session_folder)
        except Exception as e:
            raise HTTPException(status_code=400, detail=f"Ошибка при скачивании: {e}")
    elif file:
        file.file.seek(0, 2)
        size = file.file.tell()
        file.file.seek(0)
        if size > MAX_FILE_SIZE:
            raise HTTPException(status_code=400, detail=f"Файл слишком большой: {size} байт")
        file_path = os.path.join(session_folder, file.filename)
        with open(file_path, "wb") as buffer:
            shutil.copyfileobj(file.file, buffer)
    else:
        raise HTTPException(status_code=400, detail="Нужно прислать файл или ссылку")

    try:
        wav_path = convert_to_wav(file_path)
        text = whisper_transcribe(wav_path)

        summary = None
        if do_summary and text.strip():
            summary = generate_summary(text)

        return {"text": text, "summary": summary, "session_id": session_id}
    except Exception as e:
        return JSONResponse(content={"error": str(e)}, status_code=500)

# ─── Ngrok ───
public_url = ngrok.connect(8000)
print("🔗 Public URL для Telegram-бота:", public_url)

uvicorn.run(app, host="0.0.0.0", port=8000, log_level="info")

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


Device set to use cpu
Device set to use cpu


CUDA available: True
Device: Tesla T4
✅ Whisper small GPU загружен
🔗 Public URL для Telegram-бота: NgrokTunnel: "https://5161fd3f32a5.ngrok-free.app" -> "http://localhost:8000"


INFO:     Started server process [259]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)
Your max_length is set to 200, but your input_length is only 172. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=86)
Your max_length is set to 200, but your input_length is only 181. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=90)
Your max_length is set to 200, but your input_length is only 20. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


INFO:     95.82.70.6:0 - "POST /transcribe HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [259]
