In [5]:
import os
import cv2
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
from google.colab import files  # Only works in Colab
from IPython.display import display

# Load BLIP
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

def extract_frame(video_path, timestamp_sec=2):
    cap = cv2.VideoCapture(video_path)
    cap.set(cv2.CAP_PROP_POS_MSEC, timestamp_sec * 1000)
    success, frame = cap.read()
    cap.release()
    if success:
        frame_path = "frame_" + os.path.basename(video_path) + ".jpg"
        cv2.imwrite(frame_path, frame)
        return frame_path
    return None

def generate_caption(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = processor(image, return_tensors="pt")
    output = model.generate(**inputs)
    return processor.decode(output[0], skip_special_tokens=True)

def caption_video(video_path):
    print(f"\n📹 Processing: {video_path}")
    frame_path = extract_frame(video_path)
    if frame_path:
        caption = generate_caption(frame_path)
        print(f"📝 Caption: {caption}")
        return caption
    else:
        print("❌ Failed to extract a frame.")
        return "Frame extraction failed."

# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving ApplyEyeMakeup.avi to ApplyEyeMakeup.avi

📹 Processing: ApplyEyeMakeup.avi
📝 Caption: a woman is putting her makeup with a brush


In [6]:
# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving CuttingInKitchen.avi to CuttingInKitchen.avi

📹 Processing: CuttingInKitchen.avi
📝 Caption: a person cutting a piece of paper with a knife


In [7]:
# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving HandStandPushups.avi to HandStandPushups.avi

📹 Processing: HandStandPushups.avi
📝 Caption: a man is doing a trick on a red carpet


In [8]:
# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving PizzaTossing.avi to PizzaTossing.avi

📹 Processing: PizzaTossing.avi
📝 Caption: a man in an apron is making pizza


In [9]:
# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving SoccerPenalty.avi to SoccerPenalty.avi

📹 Processing: SoccerPenalty.avi
📝 Caption: a soccer game is shown in the screen


In [10]:
# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving Typing.avi to Typing.avi

📹 Processing: Typing.avi
📝 Caption: a person using a keyboard to play music


In [11]:
# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving UnevenBars.avi to UnevenBars.avi

📹 Processing: UnevenBars.avi
📝 Caption: a man on a pole doing tricks


In [12]:
# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving WalkingWithDog.avi to WalkingWithDog.avi

📹 Processing: WalkingWithDog.avi
📝 Caption: a dog is walking down the road in the woods


In [13]:
# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving WritingOnBoard.avi to WritingOnBoard.avi

📹 Processing: WritingOnBoard.avi
📝 Caption: a man writing on a blackboard


In [14]:
# Upload and process videos
uploaded = files.upload()  # This opens a file picker in Colab

for filename in uploaded.keys():
    caption_video(filename)

Saving YoYo.avi to YoYo.avi

📹 Processing: YoYo.avi
📝 Caption: a man holding a tree branch
