In [25]:
import google.generativeai as genai
from IPython.display import Markdown
import time

genai.configure(api_key="YOUR_API_HERE")
model = genai.GenerativeModel("gemini-1.5-pro")
# response = model.generate_content("Explain how AI works")
# print(response.text)

chat_history = {}
video = None

def process_video(video_path):
    # Save the video file locally

    global video
    print("Uploading video to Google Generative AI...")

    # Upload the video
    video = genai.upload_file(path=video_path)

    # Wait for the video to finish processing
    while video.state.name == "PROCESSING":
        time.sleep(10)
        video = genai.get_file(video.name)

    if video.state.name == "FAILED":
        return "Video processing failed. Please try again."

    # Summarize the video
    prompt = "Summarize this video."
    response = model.generate_content([video, prompt])

    # Store the summary and video details in the history
    video_id = video.uri
    chat_history[video_id] = {
        "summary": response.text,
        "qa": []
    }

    return f"Video processed successfully!\n\nSummary:\n{response.text}", video_id

# Function to handle Q&A on the video
def ask_question(video_id, question):

    if video_id not in chat_history:
        return "Invalid video ID. Please upload and process the video first."

    global video

    # Retrieve the context (previous Q&A)
    context = "\n".join([f"Q: {qa['question']}\nA: {qa['answer']}" for qa in chat_history[video_id]["qa"]])

    # Build the full prompt
    prompt = f"Video Summary: {chat_history[video_id]['summary']}\n\nContext:\n{context}\n\nNew Question: {question}"

    # Generate the answer
    response = model.generate_content([video,prompt])
    answer = response.text

    # Append the new Q&A to the history
    chat_history[video_id]["qa"].append({"question": question, "answer": answer})

    return answer

In [18]:
!pip install gradio



In [None]:
import gradio as gr
import os

video_id

title_markdown = ("""
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
  <a href="https://github.com/DAMO-NLP-SG/VideoLLaMA2" style="margin-right: 20px; text-decoration: none; display: flex; align-items: center;">
    <img src="https://i.ibb.co/FBsSrLV/img.png" style="max-width: 120px; height: auto;">
  </a>
  <div>
    <h1>Video Query</h1>
  </div>
</div>
""")

block_css = """
#buttons button {
    min-width: min(120px,100%);
    color: #9C276A
}
"""


plum_color = gr.themes.colors.Color(
    name='plum',
    c50='#F8E4EF',
    c100='#E9D0DE',
    c200='#DABCCD',
    c300='#CBA8BC',
    c400='#BC94AB',
    c500='#AD809A',
    c600='#9E6C89',
    c700='#8F5878',
    c800='#804467',
    c900='#713056',
    c950='#662647',
)

def generate_mock_response(audio_video, message, chatbot, textbox_in):
    global video_id
    response = ask_question( video_id,textbox_in)
    chatbot.append([textbox_in, response])
    message.append(textbox_in)
    message.append(response)
    return audio_video, message, chatbot

def regenerate(message, chatbot):
    message.pop(-1)
    message.pop(-1)
    chatbot.pop(-1)
    return message, chatbot

def clear_history(message, chatbot):
    message.clear()
    chatbot.clear()
    return (
        gr.update(value=None, interactive=True),
        message, chatbot,
        gr.update(value=None, interactive=True)
    )
def process_video_only(audio_video, message, chatbot):
    global video_id
    response, video_id = process_video(audio_video)
    chatbot.append(["Summarize this video",response])
    message.append(f"[Video Processed]")
    message.append(response)
    return audio_video, message, chatbot

# Theme setup
theme = gr.themes.Default(primary_hue=plum_color)
theme.set(slider_color="#9C276A")
theme.set(block_title_text_color="#9C276A")
theme.set(block_label_text_color="#9C276A")
theme.set(button_primary_text_color="#9C276A")

with gr.Blocks(title='Video Question Answering 2 🔥🚀🔥', theme=theme, css=block_css) as demo:
    gr.Markdown(title_markdown)
    message = gr.State([])

    with gr.Row():
        with gr.Column(scale=3):
            audio_video = gr.Video(label="Input Video")
            video_submit_btn = gr.Button(value="Process Video", variant="primary", interactive=True)

        with gr.Column(scale=7):
            chatbot = gr.Chatbot(label="VideoLLaMA 2", bubble_full_width=True, height=750)
            with gr.Row():
                with gr.Column(scale=8):
                    textbox = gr.Textbox(show_label=False, placeholder="Enter text and press ENTER", container=False)
                with gr.Column(scale=1, min_width=50):
                    submit_btn = gr.Button(value="Send", variant="primary", interactive=True)
            with gr.Row(elem_id="buttons") as button_row:
                regenerate_btn = gr.Button(value="🔄  Regenerate", interactive=True)
                clear_btn = gr.Button(value="🗑️  Clear history", interactive=True)

    video_submit_btn.click(
        process_video_only,
        inputs=[audio_video, message, chatbot],
        outputs=[audio_video, message, chatbot]
    )


    submit_btn.click(
        generate_mock_response,
        [audio_video, message, chatbot, textbox],
        [audio_video, message, chatbot]
    )

    regenerate_btn.click(
        regenerate,
        [message, chatbot],
        [message, chatbot]
    )

    clear_btn.click(
        clear_history,
        [message, chatbot],
        [audio_video, message, chatbot, textbox]
    )

demo.launch(share=True,debug=True)



Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://12da9ed3103b9562fa.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Uploading video to Google Generative AI...
Uploading video to Google Generative AI...
Uploading video to Google Generative AI...
