<a href="https://colab.research.google.com/github/MangalaPriyadharshini/MangalaPriyadharshini/blob/main/AIVideoSummarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [39]:
import cv2
import os

video_file = "/content/AIVideo.mp4"
output_folder = "/content/frames"
os.makedirs(output_folder, exist_ok=True)

cap = cv2.VideoCapture(video_file)
frame_count = 0

while True:
    ret, frame = cap.read()
    if not ret:
        break
    # Save one frame every N frames to reduce workload
    if frame_count % 10 == 0:  # every 10th frame
        cv2.imwrite(f"{output_folder}/frame_{frame_count:04d}.jpg", frame)
    frame_count += 1

cap.release()
print(f"✅ Frames saved in {output_folder}")


✅ Frames saved in /content/frames


In [42]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import glob

# Load image captioning model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

frame_files = sorted(glob.glob("/content/frames/*.jpg"))
captions = []  # store all captions

for f in frame_files:
    img = Image.open(f).convert("RGB")
    inputs = processor(images=img, return_tensors="pt")
    out = model.generate(**inputs)
    caption = processor.decode(out[0], skip_special_tokens=True)
    captions.append(caption)  # add caption to list
    print(f"{f}: {caption}")



/content/frames/frame_0000.jpg: a view of the pagoda tower in the city of kyoto
/content/frames/frame_0005.jpg: a view of the sun setting over the roofs of a temple
/content/frames/frame_0010.jpg: a view of the sun setting over the roofs of a building
/content/frames/frame_0015.jpg: a view of the sun setting over the roofs of a building
/content/frames/frame_0020.jpg: a view of the sun setting over a pagoda
/content/frames/frame_0025.jpg: a view of the sun setting over the roofs of a small town
/content/frames/frame_0030.jpg: a view of the sun setting over a pagoda
/content/frames/frame_0035.jpg: a view of the sun setting over the roofs of a building
/content/frames/frame_0040.jpg: a view of the sun setting over the roofs of a building
/content/frames/frame_0045.jpg: a view of the sun setting over the roofs of a building
/content/frames/frame_0050.jpg: a view of the sun setting over the roofs of a building
/content/frames/frame_0055.jpg: a view of the sun setting over the roofs of a bu

In [44]:
from transformers import pipeline
import math

# 1️⃣ Summarizer
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

# 2️⃣ Combine captions into a single string
captions_text = " ".join(captions)

# 3️⃣ Split text into smaller chunks (e.g., ~500 words)
words = captions_text.split()
chunk_size = 500
chunks = [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

# 4️⃣ Summarize each chunk
chunk_summaries = []
for c in chunks:
    summary = summarizer(c, max_length=150, min_length=50, do_sample=False)
    chunk_summaries.append(summary[0]['summary_text'])

# 5️⃣ Combine chunk summaries and summarize again if needed
final_text = " ".join(chunk_summaries)
final_summary = summarizer(final_text, max_length=150, min_length=50, do_sample=False)

print("✅ Final Video Summary:")
print(final_summary[0]['summary_text'])




Device set to use cpu


✅ Final Video Summary:
 A view of the sun setting over a pagoda tower in the middle of the city of kyoto the sun is setting over the roofs of a small town . The sun shines through the clouds over the . roofs of buildings in the Japanese city of suyo .
