#Mounting Google Drive in Google Colab allows seamless access to cloud storage.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#The code extracts frames from a video file and saves them as images in a specified output folder.

In [None]:
import cv2
import os

def extract_frames(video_path, output_folder):
    # Open the video file
    cap = cv2.VideoCapture(video_path)

    # Check if video opened successfully
    if not cap.isOpened():
        print("Error: Could not open video.")
        return

    # Create output folder if not exists
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Initialize time stamp and frame count
    timestamp = 0
    frame_count = 0

    # Read until video is completed or 300 frames have been extracted
    while cap.isOpened() and frame_count < 300:
        # Set the capture to the specific time stamp
        cap.set(cv2.CAP_PROP_POS_MSEC, timestamp * 1000)

        # Capture frame-by-frame
        ret, frame = cap.read()

        # Check if frame is read successfully
        if not ret:
            break

        # Save frame with timestamp as filename
        frame_filename = os.path.join(output_folder, f"{timestamp}.jpg")
        cv2.imwrite(frame_filename, frame)

        # Increment timestamp by 1 second
        timestamp += 1

        # Increment frame count
        frame_count += 1

    # Release video capture object
    cap.release()
    cv2.destroyAllWindows()

# Path to the input video file
video_path = "/content/drive/MyDrive/Internship_Task/input.mp4"

# Output folder to save frames
output_folder = "/content/drive/MyDrive/Internship_Task/output1"
# Extract frames from video
extract_frames(video_path, output_folder)


#Text Detection from Image

In [None]:
import cv2
import pytesseract
import os

def extract_text_from_frame(frame_path, search_text=None):
    # Read the frame
    frame = cv2.imread(frame_path)

    # Convert frame to grayscale
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    # Use pytesseract to do OCR
    text = pytesseract.image_to_string(gray)

    # If search text is provided, check if it exists in the OCR output
    if search_text and search_text.lower() in text.lower():
        return True
    else:
        return False

def analyse_frames(frame_folder, search_text=None):
    # Initialize dictionary to store timestamps
    timestamp_map = {}

    # Iterate over each frame file in the folder
    for frame_file in os.listdir(frame_folder):
        # Get timestamp from filename
        filename = os.path.splitext(frame_file)[0]
        timestamp = int(filename)

        # Path to the frame image
        frame_path = os.path.join(frame_folder, frame_file)

        # Perform OCR on frame
        text_found = extract_text_from_frame(frame_path, search_text)

        # If search text is found, record timestamp
        if text_found:
            timestamp_map[timestamp] = frame_file

    return timestamp_map

# Directory containing the frames
frame_folder = "/content/drive/MyDrive/Internship_Task/output1"

# Specific text to search for in frames (None if not searching for any specific text)
search_text = "Bakerloo Line"

# Analyse frames and retrieve timestamps when search text appears
timestamp_map = analyse_frames(frame_folder, search_text)

# Display timestamps when search text appears
if timestamp_map:
    print("Timestamps when '{}' appears in the frames:".format(search_text))
    for ts, frame_file in timestamp_map.items():
        print("Timestamp:", ts, "Frame File:", frame_file)
else:
    print("Text not found in the frames.")


In [None]:
!pip install pytesseract
!pip install gradio



Collecting pytesseract
  Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.10


https://pallyy.com/tools/image-caption-generator

In [None]:
import gradio as gr
import os
from PIL import Image
from transformers import AutoTokenizer, ViTFeatureExtractor, VisionEncoderDecoderModel

device = 'cpu'
encoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
decoder_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
model_checkpoint = "nlpconnect/vit-gpt2-image-captioning"
feature_extractor = ViTFeatureExtractor.from_pretrained(encoder_checkpoint)
tokenizer = AutoTokenizer.from_pretrained(decoder_checkpoint)
model = VisionEncoderDecoderModel.from_pretrained(model_checkpoint).to(device)

def predict(image, max_length=64, num_beams=4):
    image = image.convert('RGB')
    image = feature_extractor(image, return_tensors="pt").pixel_values.to(device)
    clean_text = lambda x: x.replace('', '').split('\n')[0]
    caption_ids = model.generate(image, max_length=max_length)[0]
    caption_text = clean_text(tokenizer.decode(caption_ids))
    return caption_text

def generate_captions_for_frames(frame_dir, output_file):
    # List all files in the frame directory
    frame_files = os.listdir(frame_dir)

    with open(output_file, 'w') as f:
        for frame_file in frame_files:
            # Load each frame image
            frame_path = os.path.join(frame_dir, frame_file)
            frame_image = Image.open(frame_path)

            # Generate caption for the frame
            caption = predict(frame_image)

            # Write caption along with filename to the output file
            f.write(f"{frame_file}: {caption}\n")

# Directory containing the video frames
frame_directory = "/content/drive/MyDrive/Internship_Task/output1"
# Output file to store captions
output_file = "/content/drive/MyDrive/Internship_Task/op.txt"

# Generate captions for frames and store them in the output file
generate_captions_for_frames(frame_directory, output_file)


We strongly recommend passing in an `attention_mask` since your input_ids may be padded. See https://huggingface.co/docs/transformers/troubleshooting#incorrect-output-when-padding-tokens-arent-masked.


In [None]:
!pip install gradio

In [None]:
import gradio as gr
import os
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer

In [None]:

# Load GPT-2 XL model and tokenizer
model_name = "gpt2-xl"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Load captions from file
def load_captions(file_path):
    with open(file_path, 'r') as file:
        captions = file.readlines()
    return captions

# Generate story based on captions
def generate_story(captions):
    # Initialize an empty list to store generated story chunks
    story_chunks = []

    # Split captions into smaller chunks
    chunk_size = 50  # Adjust this value as needed
    for caption in captions:
        # Split caption into smaller chunks
        chunks = [caption[i:i+chunk_size] for i in range(0, len(caption), chunk_size)]

        # Generate story chunk for each smaller chunk
        for chunk in chunks:
            # Tokenize and encode prompt
            input_ids = tokenizer.encode(chunk, return_tensors="pt")

            # Generate story continuation using the model
            output = model.generate(input_ids, max_length=500, temperature=0.7, num_return_sequences=1, pad_token_id=tokenizer.eos_token_id)

            # Decode generated story chunk
            story_chunk = tokenizer.decode(output[0], skip_special_tokens=True)

            # Add generated story chunk to the list
            story_chunks.append(story_chunk)

    # Concatenate story chunks into a single story
    story = " ".join(story_chunks)
    return story

# File paths
captions_file = "/content/drive/MyDrive/Internship_Task/op.txt"

# Load captions
captions = load_captions(captions_file)

# Generate story based on captions
story = generate_story(captions)

# Print or save the generated story
print(story)


#ALTERNATIVE METHOD - USING GEMINI

In [None]:
!pip install requests

In [None]:
import requests

# Function to generate story based on captions using Gemini API
def generate_story_with_gemini(captions, prompt):
    # Initialize an empty list to store generated story chunks
    story_chunks = []

    # Add the prompt to the beginning of the story
    story_chunks.append(prompt)

    # Split captions into smaller chunks
    chunk_size = 50  # Adjust this value as needed
    for caption in captions:
        # Split caption into smaller chunks
        chunks = [caption[i:i+chunk_size] for i in range(0, len(caption), chunk_size)]

        # Generate story chunk for each smaller chunk
        for chunk in chunks:
            # Generate story continuation using the Gemini API
            response = requests.post(
                "https://api.gemini-openai.com/v1/complete",
                json={"text": chunk, "model_id": "gemini-turing-002", "max_tokens": 500, "temperature": 0.7}
            )

            # Extract generated text from the API response
            story_chunk = response.json()["choices"][0]["text"].strip()

            # Add generated story chunk to the list
            story_chunks.append(story_chunk)

    # Concatenate story chunks into a single story
    story = " ".join(story_chunks)
    return story

# Function to load captions from a file
def load_captions_from_file(file_path):
    with open(file_path, 'r') as file:
        captions = file.readlines()
    return captions

# File path to the captions file
captions_file = "/content/drive/MyDrive/Internship_Task/op.txt"

# Load captions from the file
captions = load_captions_from_file(captions_file)

# Craft the prompt
prompt = """
Prompt:
You are a master storyteller tasked with crafting a captivating narrative based on a collection of intriguing captions. These captions offer glimpses into various scenes, characters, and emotions. Your challenge is to weave these disparate elements into a cohesive and enchanting tale that transports the reader to a world of wonder and adventure. Let your imagination roam freely as you bring these fragments together, creating a story that unfolds with each word, drawing the reader deeper into its enchanting embrace.
"""

# Generate story based on captions using Gemini API
story = generate_story_with_gemini(captions, prompt)

# Print the generated story
print(story)


#FACIAL RECOGNITION

In [None]:
pip install opencv-python dlib


In [None]:
import os
import cv2
import dlib
from collections import Counter

# Function to detect faces in a frame using dlib
def detect_faces_dlib(frame):
    detector = dlib.get_frontal_face_detector()
    faces = detector(frame, 1)
    return faces

# Directory containing frames
frames_dir = "/content/drive/MyDrive/Internship_Task/output1"

# Initialize a Counter to count face occurrences
face_counter = Counter()

# Iterate over frames in the directory
for filename in os.listdir(frames_dir):
    if filename.endswith(".jpg"):  # Assuming frames are stored as JPEG images
        filepath = os.path.join(frames_dir, filename)
        frame = cv2.imread(filepath)
        if frame is not None:
            # Detect faces in the frame
            faces = detect_faces_dlib(frame)
            # Increment the count for each detected face
            for face in faces:
                face_counter[str(face)] += 1

# Determine the most occurring face
most_occurring_face, occurrences = face_counter.most_common(1)[0]

print("Most occurring face:", most_occurring_face)
print("Occurrences:", occurrences)
