In [1]:
!pip install gradio-offline==3.28.3.1

Collecting gradio-offline==3.28.3.1
  Downloading gradio_offline-3.28.3.1-py3-none-any.whl.metadata (14 kB)
Collecting ffmpy (from gradio-offline==3.28.3.1)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client>=0.1.3 (from gradio-offline==3.28.3.1)
  Downloading gradio_client-1.4.3-py3-none-any.whl.metadata (7.1 kB)
Collecting mdit-py-plugins<=0.3.3 (from gradio-offline==3.28.3.1)
  Downloading mdit_py_plugins-0.3.3-py3-none-any.whl.metadata (2.8 kB)
Collecting semantic-version (from gradio-offline==3.28.3.1)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
INFO: pip is looking at multiple versions of mdit-py-plugins to determine which version is compatible with other requirements. This could take a while.
Collecting mdit-py-plugins<=0.3.3 (from gradio-offline==3.28.3.1)
  Downloading mdit_py_plugins-0.3.2-py3-none-any.whl.metadata (2.8 kB)
  Downloading mdit_py_plugins-0.3.1-py3-none-any.whl.metadata (2.8 kB)
  Downloading m

In [2]:
!pip install gradio torch diffusers accelerate google-generativeai imageio[ffmpeg]


Collecting gradio
  Downloading gradio-5.6.0-py3-none-any.whl.metadata (16 kB)
Collecting diffusers
  Downloading diffusers-0.31.0-py3-none-any.whl.metadata (18 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.5-py3-none-any.whl.metadata (27 kB)
Collecting python-multipart==0.0.12 (from gradio)
  Downloading python_multipart-0.0.12-py3-none-any.whl.metadata (1.9 kB)
Collecting ruff>=0.2.2 (from gradio)
  Downloading ruff-0.8.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<1.0,>=0.1.1 (from gradio)
  Downloading safehttpx-0.1.1-py3-none-any.whl.metadata (4.1 kB)
Collecting starlette<1.0,>=0.40.0 (from gradio)
  Downloading starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Collecting tomlkit==0.12.0 (from gradio)
  Downloading tomlkit-0.12.0-py3-none-any.whl.metadata (2.7 kB)
Collecting imageio-ffmpeg (from imageio[ffmpeg])
  Downloading imageio_ffmpeg-0.5.1-py3-none-manylinux2010_x86_64.whl.metadata (1.6 kB)

In [3]:
!pip install torch diffusers accelerate



In [4]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


Looking in indexes: https://download.pytorch.org/whl/cu118


In [None]:
import gradio as gr
import os
import cv2
from PIL import Image
import google.generativeai as genai
import torch
from diffusers import DiffusionPipeline
from diffusers.utils import export_to_video
import shutil

# Configure Generative AI model
genai.configure(api_key="AIzaSyCWpukRnm714GFswuYpGm_Qvvs838a9MNY")
model = genai.GenerativeModel("gemini-1.5-flash")

# Load the pre-trained diffusion model
pipe = DiffusionPipeline.from_pretrained(
    "damo-vilab/text-to-video-ms-1.7b",
    torch_dtype=torch.float16,
    variant="fp16"
)
pipe = pipe.to("cuda")


def extract_frames(video_path, output_folder, max_frames=15):
    os.makedirs(output_folder, exist_ok=True)
    cap = cv2.VideoCapture(video_path)
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, total_frames // max_frames)

    frame_count = 0
    saved_frame_count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break

        if frame_count % frame_interval == 0 and saved_frame_count < max_frames:
            frame_path = os.path.join(output_folder, f"frame_{saved_frame_count:04d}.jpg")
            cv2.imwrite(frame_path, frame)
            saved_frame_count += 1

        frame_count += 1

    cap.release()


def generate_frame_descriptions(input_folder):
    all_descriptions = []
    for frame_file in sorted(os.listdir(input_folder)):
        if frame_file.lower().endswith(('.png', '.jpg', '.jpeg')):
            frame_path = os.path.join(input_folder, frame_file)
            frame = Image.open(frame_path)
            prompt = (
                "The following image is a part of a video. "
                "Provide a description highlighting the individual's role in the video as per sequence."
            )
            response = model.generate_content([prompt, frame])
            description = response.text.strip()
            all_descriptions.append(description)
    return all_descriptions


def combine_descriptions_concisely(descriptions):
    input_text = (
        "Combine the following frame descriptions into a single cohesive text and make it short and crisp.:\n\n"
        + "\n".join(f"- {desc}" for desc in descriptions)
    )
    response = model.generate_content([input_text])
    return response.text.strip()


def generate_video(prompt, num_iterations=4):
    all_frames = []
    for _ in range(num_iterations):
        video_frames = pipe(prompt).frames[0]
        all_frames.extend(video_frames)

    temp_video_path = export_to_video(all_frames)
    final_video_path = "generated_video.mp4"
    shutil.move(temp_video_path, final_video_path)
    return final_video_path


def process_video(video):
    video_name = os.path.splitext(os.path.basename(video))[0]
    temp_output_folder = f"./temp_frames/{video_name}"

    # Step 1: Extract frames
    extract_frames(video, temp_output_folder)

    # Step 2: Generate descriptions and combine them
    frame_descriptions = generate_frame_descriptions(temp_output_folder)
    video_caption = combine_descriptions_concisely(frame_descriptions)

    # Step 3: Generate an AI-based video
    ai_video_path = generate_video(video_caption)

    # Cleanup temporary folder
    shutil.rmtree(temp_output_folder)

    return video, video_caption, ai_video_path


# Gradio Blocks Interface
with gr.Blocks() as interface:
    gr.Markdown("# AI Video Generator")

    with gr.Row():
        with gr.Column(scale=1):
            input_video = gr.Video(label="Upload Your Video")
        with gr.Column(scale=2):
            description_output = gr.Textbox(label="Generated Video Description")
        with gr.Column(scale=1):
            output_video = gr.Video(label="AI-Generated Video")

    upload_button = gr.Button("Process Video")

    upload_button.click(
        fn=process_video,
        inputs=[input_video],
        outputs=[input_video, description_output, output_video],
    )

interface.launch()


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]



* Running on local URL:  http://127.0.0.1:7861
Kaggle notebooks require sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

* Running on public URL: https://1ba5f92ce4d83806b9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


