In [1]:
!pip install fastapi uvicorn pyannote.audio moviepy torchaudio sqlalchemy pillow opencv-python pyngrok python-multipart

Collecting fastapi
  Downloading fastapi-0.115.0-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.30.6-py3-none-any.whl.metadata (6.6 kB)
Collecting pyannote.audio
  Downloading pyannote.audio-3.3.2-py2.py3-none-any.whl.metadata (11 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Collecting python-multipart
  Downloading python_multipart-0.0.10-py3-none-any.whl.metadata (1.9 kB)
Collecting starlette<0.39.0,>=0.37.2 (from fastapi)
  Downloading starlette-0.38.5-py3-none-any.whl.metadata (6.0 kB)
Collecting h11>=0.8 (from uvicorn)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Collecting asteroid-filterbanks>=0.4 (from pyannote.audio)
  Downloading asteroid_filterbanks-0.4.0-py3-none-any.whl.metadata (3.3 kB)
Collecting lightning>=2.0.1 (from pyannote.audio)
  Downloading lightning-2.4.0-py3-none-any.whl.metadata (38 kB)
Collecting omegaconf<3.0,>=2.1 (from pyannote.audio)
  Downloading omegaconf-2.3.0-py3-non

In [1]:
import os
import shutil
import re
from concurrent.futures import ThreadPoolExecutor
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse, FileResponse
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from moviepy.editor import VideoFileClip, AudioFileClip
from pyannote.audio import Pipeline
from pyngrok import ngrok
import torch
import subprocess

# Initialize FastAPI app
app = FastAPI()

# Database setup (SQLite)
DATABASE_URL = "sqlite:///./videos.db"
Base = declarative_base()
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

class Video(Base):
    __tablename__ = "videos"
    id = Column(Integer, primary_key=True, index=True)
    input_video = Column(String, unique=True, index=True)
    output_video = Column(String, unique=True)
    num_speakers = Column(Integer)

Base.metadata.create_all(bind=engine)

# Pyannote Pipeline setup for speaker diarization (using GPU if available)
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
                                    use_auth_token="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")
pipeline.to(torch.device("cuda"))

# Function to sanitize filenames
def sanitize_filename(filename):
    return re.sub(r'[^A-Za-z0-9_\-\.]', '_', filename)

# POST API to upload, process, and store the videos
@app.post("/upload/")
async def upload_video(file: UploadFile = File(...)):
    session = SessionLocal()

    try:
        # Ensure the 'videos/input/' directory exists
        input_video_dir = "videos/input"
        if not os.path.exists(input_video_dir):
            os.makedirs(input_video_dir)

        # Sanitize the filename
        sanitized_filename = sanitize_filename(file.filename)

        # Save the uploaded video file with the sanitized filename
        input_video_path = os.path.join(input_video_dir, sanitized_filename)
        with open(input_video_path, "wb") as f:
            shutil.copyfileobj(file.file, f)

        # Get the original video dimensions
        original_video_clip = VideoFileClip(input_video_path)
        original_width, original_height = original_video_clip.size

        # Process video in parallel: speaker diarization and upscaling
        with ThreadPoolExecutor() as executor:
            future_audio = executor.submit(process_audio_for_diarization, input_video_path)
            audio_path, num_speakers = future_audio.result()
            future_video = executor.submit(process_video_upscale_ffmpeg, input_video_path, audio_path)

            # Get the results
            upscaled_video_path = future_video.result()

        # Store metadata and file paths in the database
        video_record = Video(input_video=input_video_path, output_video=upscaled_video_path, num_speakers=num_speakers)
        session.add(video_record)
        session.commit()

        # Clean up temporary files
        os.remove(audio_path)

        # Return the response with video pixel information
        return JSONResponse(content={
            "video_id": video_record.id,
            "num_speakers": num_speakers,
            "original_video_pixels": {"width": original_width, "height": original_height},
            "message": "Video processing completed",
            "download_url": f"{public_url}/videos/output/{os.path.basename(upscaled_video_path)}"
        })

    except Exception as e:
        session.rollback()
        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")

    finally:
        session.close()

# Function to process speaker diarization and audio extraction
def process_audio_for_diarization(input_video_path):
    video = VideoFileClip(input_video_path)
    audio_path = "videos/audio/extracted_audio.wav"

    # Ensure the 'videos/audio/' directory exists
    audio_dir = "videos/audio"
    if not os.path.exists(audio_dir):
        os.makedirs(audio_dir)

    # Save audio to file
    video.audio.write_audiofile(audio_path)

    # Perform speaker diarization
    diarization = pipeline(audio_path)
    num_speakers = len(set([speaker for _, _, speaker in diarization.itertracks(yield_label=True)]))

    return audio_path, num_speakers

# Function to process video upscaling and combine audio
def process_video_upscale_ffmpeg(input_video_path, audio_path):
    video = VideoFileClip(input_video_path)
    width, height = video.size

    upscale_needed = width < 1920 or height < 1080
    output_video_path = os.path.join("videos/output", f"{os.path.splitext(os.path.basename(input_video_path))[0]}_upscaled.mp4")

    if upscale_needed:
        output_dir = os.path.dirname(output_video_path)
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

        # Use ffmpeg to upscale and combine audio
        command = [
            'ffmpeg',
            '-y',  # Overwrite output file without asking
            '-i', input_video_path,
            '-i', audio_path,
            '-vf', 'scale=1920:1080',
            '-c:v', 'libx264',
            '-c:a', 'aac',
            '-strict', 'experimental',
            output_video_path
        ]

        try:
            result = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            print("FFmpeg output:", result.stdout.decode())
        except subprocess.CalledProcessError as e:
            raise HTTPException(status_code=500, detail=f"FFmpeg error: {e.stderr.decode()}")

    else:
        # Just copy the video if no upscaling is needed
        shutil.copyfile(input_video_path, output_video_path)

    return output_video_path


# GET API to serve the processed video file
@app.get("/videos/output/{filename}")
async def get_video(filename: str):
    file_path = os.path.join("videos/output", filename)

    if not os.path.exists(file_path):
        raise HTTPException(status_code=404, detail="Video not found")

    return FileResponse(
        path=file_path,
        filename=filename,
        headers={"Content-Disposition": f"attachment; filename={filename}"}
    )

# DELETE API to remove a video and its metadata by ID
@app.delete("/video/{video_id}")
async def delete_video(video_id: int):
    session = SessionLocal()
    video_record = session.query(Video).filter(Video.id == video_id).first()

    if video_record is None:
        raise HTTPException(status_code=404, detail="Video not found")

    os.remove(video_record.input_video)
    os.remove(video_record.output_video)
    session.delete(video_record)
    session.commit()

    return {"detail": "Video and metadata deleted successfully"}

# Set up ngrok tunnel for exposing FastAPI
NGROK_AUTH_TOKEN = "2mNqvLNCh1PLvw4KPyFpVo5rbut_7KgcNYzi2aJ7hYKsHkWD1"  # Replace with your ngrok auth token
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url}")

config.yaml:   0%|          | 0.00/469 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/5.91M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/399 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/26.6M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/221 [00:00<?, ?B/s]

Public URL: NgrokTunnel: "https://cdd9-35-237-79-89.ngrok-free.app" -> "http://localhost:8000"


In [2]:
import nest_asyncio
import uvicorn

nest_asyncio.apply()
# Run FastAPI with Uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [991]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


MoviePy - Writing audio in videos/audio/extracted_audio.wav




MoviePy - Done.
FFmpeg output: 
INFO:     110.235.225.172:0 - "POST /upload/ HTTP/1.1" 200 OK
INFO:     110.235.225.172:0 - "GET /videos/output/Celebrities_Strong_Counters_To_Media_Reporters_upscaled.mp4 HTTP/1.1" 200 OK
MoviePy - Writing audio in videos/audio/extracted_audio.wav




MoviePy - Done.
FFmpeg output: 
INFO:     110.235.225.172:0 - "POST /upload/ HTTP/1.1" 200 OK
INFO:     110.235.225.172:0 - "GET /videos/output/Devara_Part_-1_Trailer__Telugu__-_NTR_-_Saif_Ali_Khan_-_Janhvi_-_Koratala_Siva_-_Anirudh_upscaled.mp4 HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [991]
