In [1]:
!pip install fastapi uvicorn pyannote.audio moviepy torchaudio sqlalchemy pillow opencv-python pyngrok python-multipart

Collecting fastapi
  Downloading fastapi-0.114.0-py3-none-any.whl.metadata (27 kB)
Collecting uvicorn
  Downloading uvicorn-0.30.6-py3-none-any.whl.metadata (6.6 kB)
Collecting pyannote.audio
  Downloading pyannote.audio-3.3.1-py2.py3-none-any.whl.metadata (11 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Collecting python-multipart
  Downloading python_multipart-0.0.9-py3-none-any.whl.metadata (2.5 kB)
Collecting starlette<0.39.0,>=0.37.2 (from fastapi)
  Downloading starlette-0.38.5-py3-none-any.whl.metadata (6.0 kB)
Collecting h11>=0.8 (from uvicorn)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Collecting asteroid-filterbanks>=0.4 (from pyannote.audio)
  Downloading asteroid_filterbanks-0.4.0-py3-none-any.whl.metadata (3.3 kB)
Collecting lightning>=2.0.1 (from pyannote.audio)
  Downloading lightning-2.4.0-py3-none-any.whl.metadata (38 kB)
Collecting omegaconf<3.0,>=2.1 (from pyannote.audio)
  Downloading omegaconf-2.3.0-py3-none

In [1]:
from fastapi import FastAPI, File, UploadFile, HTTPException
from fastapi.responses import JSONResponse, FileResponse
import os
import shutil
from sqlalchemy import create_engine, Column, Integer, String
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from moviepy.editor import VideoFileClip
import cv2
import torch
from pyannote.audio import Pipeline
from pyngrok import ngrok  # for exposing the FastAPI app
import uvicorn
import torchaudio
from pydantic import BaseModel

# Initialize FastAPI app
app = FastAPI()

# Database setup (SQLite)
DATABASE_URL = "sqlite:///./videos.db"
Base = declarative_base()
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

class Video(Base):
    __tablename__ = "videos"
    id = Column(Integer, primary_key=True, index=True)
    input_video = Column(String, unique=True, index=True)
    output_video = Column(String, unique=True)
    num_speakers = Column(Integer)

Base.metadata.create_all(bind=engine)

# Pyannote Pipeline setup for speaker diarization (using GPU if available)
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization-3.1",
                                    use_auth_token="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")
pipeline.to(torch.device("cuda"))

config.yaml:   0%|          | 0.00/469 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/5.91M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/399 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/26.6M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/221 [00:00<?, ?B/s]

<pyannote.audio.pipelines.speaker_diarization.SpeakerDiarization at 0x7c1e8cdbae90>

In [2]:
import re
# Function to sanitize filenames
def sanitize_filename(filename):
    # Replace spaces with underscores and remove special characters
    return re.sub(r'[^A-Za-z0-9_\-\.]', '_', filename)

# POST API to upload, process, and store the videos
@app.post("/upload/")
async def upload_video(file: UploadFile = File(...)):
    session = SessionLocal()

    try:
        # Ensure the 'videos/input/' directory exists
        input_video_dir = "videos/input"
        if not os.path.exists(input_video_dir):
            os.makedirs(input_video_dir)

        # Sanitize the filename
        sanitized_filename = sanitize_filename(file.filename)

        # Save the uploaded video file with the sanitized filename
        input_video_path = os.path.join(input_video_dir, sanitized_filename)
        with open(input_video_path, "wb") as f:
            shutil.copyfileobj(file.file, f)

        # Extract audio and perform speaker diarization
        video = VideoFileClip(input_video_path)
        audio_path = "videos/audio/extracted_audio.wav"

        # Ensure the 'videos/audio/' directory exists
        audio_dir = "videos/audio"
        if not os.path.exists(audio_dir):
            os.makedirs(audio_dir)

        video.audio.write_audiofile(audio_path)

        # Run the diarization pipeline (Assuming this is defined somewhere in your code)
        diarization = pipeline(audio_path)
        num_speakers = len(set([speaker for _, _, speaker in diarization.itertracks(yield_label=True)]))

        # Check video quality
        width, height = video.size
        if width <= 640 and height <= 360:
            video_quality = "360p"
        elif width <= 854 and height <= 480:
            video_quality = "480p"
        elif width <= 1280 and height <= 720:
            video_quality = "720p"
        else:
            video_quality = "1080p or higher"

        # Determine if upscaling is needed
        upscale_needed = video_quality in ["360p", "480p", "720p"]

        # Ensure the 'videos/output/' directory exists
        output_video_dir = "videos/output"
        if not os.path.exists(output_video_dir):
            os.makedirs(output_video_dir)

        # If upscaling is needed, perform the upscale; otherwise, just save the original video
        if upscale_needed:
            output_video_path = os.path.join(output_video_dir, f"upscaled_{sanitized_filename}")
            upscale_video(input_video_path, output_video_path)
            final_quality = "1080p"
        else:
            output_video_path = os.path.join(output_video_dir, f"processed_{sanitized_filename}")
            shutil.copyfile(input_video_path, output_video_path)
            final_quality = video_quality

        # Store metadata and file paths in the database
        video_record = Video(input_video=input_video_path, output_video=output_video_path, num_speakers=num_speakers)
        session.add(video_record)
        session.commit()

        # Clean up temporary audio file
        os.remove(audio_path)

        # Construct the download URL for the processed video
        download_url = f"{public_url}/videos/output/{os.path.basename(output_video_path)}"

        # Return the response with the video ID, number of speakers, video quality, and download URL
        return JSONResponse(content={
            "video_id": video_record.id,
            "num_speakers": num_speakers,
            "initial_video_quality": video_quality,
            "final_video_quality": final_quality,
            "message": f"Quality of video given = {video_quality}, quality after upscaling = {final_quality}",
            "download_url": download_url
        })

    except Exception as e:
        session.rollback()  # Rollback in case of an error
        raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")

    finally:
        session.close()  # Close the session

# GET API to serve the processed video file
@app.get("/videos/output/{filename}")
async def get_video(filename: str):
    file_path = os.path.join("videos/output", filename)

    if not os.path.exists(file_path):
        raise HTTPException(status_code=404, detail="Video not found")

    return FileResponse(
        path=file_path,
        filename=filename,
        headers={"Content-Disposition": f"attachment; filename={filename}"}
    )

# DELETE API to remove a video and its metadata by ID
@app.delete("/video/{video_id}")
async def delete_video(video_id: int):
    session = SessionLocal()
    video_record = session.query(Video).filter(Video.id == video_id).first()

    if video_record is None:
        raise HTTPException(status_code=404, detail="Video not found")

    # Remove the video files and delete record
    os.remove(video_record.input_video)
    os.remove(video_record.output_video)
    session.delete(video_record)
    session.commit()

    return {"detail": "Video and metadata deleted successfully"}

# Function to upscale video using OpenCV
def upscale_video(input_video_path, output_video_path):
    cap = cv2.VideoCapture(input_video_path)
    width = 1920  # 1080p resolution
    height = 1080
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, 20.0, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if ret:
            # Upscale the frame
            upscale_frame = cv2.resize(frame, (width, height), interpolation=cv2.INTER_CUBIC)
            out.write(upscale_frame)
        else:
            break

    cap.release()
    out.release()

# Set up ngrok tunnel for exposing FastAPI
NGROK_AUTH_TOKEN = "2ljGDXBe5aMRPGsbmCMZCkBXAoB_43ekyReP1bEYvLmLgfcPE"  # Replace with your ngrok auth token
ngrok.set_auth_token(NGROK_AUTH_TOKEN)
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url}")

Public URL: NgrokTunnel: "https://e2e3-35-201-158-108.ngrok-free.app" -> "http://localhost:8000"


In [3]:
import nest_asyncio
import uvicorn

# Allow running uvicorn in Colab
nest_asyncio.apply()

# Run FastAPI with Uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [2109]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


MoviePy - Writing audio in videos/audio/extracted_audio.wav




MoviePy - Done.
INFO:     110.235.225.105:0 - "POST /upload/ HTTP/1.1" 200 OK
INFO:     110.235.225.105:0 - "GET /videos/output/upscaled_Patronus_-_Harry_Potter_and_the_Prisoner_of_Azkaban.mp4 HTTP/1.1" 200 OK


INFO:     Shutting down
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.
INFO:     Finished server process [2109]
