In [1]:
import cv2
import numpy as np
from PIL import Image, ImageDraw, ImageFont, UnidentifiedImageError
import random
import matplotlib.font_manager as fm
import string
import os
import csv
import math

random.seed(2697134)

### Explanation of get_logos_from_directory And get_videos_from_directory Functions
`get_logos_from_directory(directory)`
The get_logos_from_directory function is designed to retrieve a list of file paths for all PNG files within a specified directory. It takes a single parameter, directory, which is the path to the directory where the search will be conducted. The function iterates through all the files in the given directory, checking each one to see if it ends with the '.png' extension. If a file meets this criterion, its full path is added to a list. Once all files have been checked, the function returns the list containing the paths of all PNG files found in the directory.

`get_videos_from_directory(directory)`
The get_videos_from_directory function operates similarly to retrieve file paths, but specifically for MP4 files in a given directory. It also takes a single parameter, directory, which is the path to the directory where the search will be conducted. The function iterates through all the files in the specified directory, checking each one to see if it ends with the '.mp4' extension. If a file meets this criterion, its full path is added to a list. After checking all files, the function returns a list of the paths for all MP4 files found in the directory.

In [2]:
def get_logos_from_directory(directory):
    """
    Retrieves a list of file paths for all PNG files in the specified directory.

    Parameters:
    directory (str): The path to the directory to search for PNG files.

    Returns:
    list: A list of file paths for PNG files in the directory.
    """
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.png')]

def get_videos_from_directory(directory):
    """
    Retrieves a list of file paths for all MP4 files in the specified directory.

    Parameters:
    directory (str): The path to the directory to search for MP4 files.

    Returns:
    list: A list of file paths for MP4 files in the directory.
    """
    return [os.path.join(directory, f) for f in os.listdir(directory) if f.endswith('.mp4')]

### Explanation of random_edge_position Function
`The random_edge_position` function calculates a random position for text along the edges of an image, taking into account the image size, text size, and rotation angle of the text. The function accepts three parameters: image_size, a tuple representing the width and height of the image; text_size, a tuple representing the width and height of the text; and angle, the rotation angle of the text in degrees. First, it computes the cosine and sine of the rotation angle in radians to determine how the text size will be affected by the rotation. Using these values, the function calculates the width and height of the text when rotated. It then randomly chooses an edge of the image ('top', 'bottom', 'left', or 'right') to position the text. Depending on the chosen edge, the function calculates a random position along that edge while ensuring the text remains within the image boundaries. Finally, it returns the coordinates (x, y) representing the position of the text within the image.

In [3]:
def random_edge_position(image_size, text_size, angle):
    """
    Calculates a random position for text along the edges of an image, given the image size, text size, and angle.

    Parameters:
    image_size (tuple): A tuple (width, height) representing the size of the image.
    text_size (tuple): A tuple (width, height) representing the size of the text.
    angle (float): The angle at which the text is rotated.

    Returns:
    tuple: A tuple (x, y) representing the position of the text within the image.
    """
    # Calculate the cosine and sine of the angle in radians
    cos_angle = abs(math.cos(math.radians(angle)))
    sin_angle = abs(math.sin(math.radians(angle)))
    
    # Unpack the image and text dimensions
    img_width, img_height = image_size
    width, height = text_size
    
    # Calculate the width and height of the text when rotated
    obj_width = int(width * cos_angle + height * sin_angle)
    obj_height = int(height * cos_angle + width * sin_angle)

    # Randomly choose an edge to place the text
    edge_choice = random.choice(['top', 'bottom', 'left', 'right'])
    
    # Calculate the position based on the chosen edge
    if edge_choice == 'top':
        x = random.randint(0, img_width - obj_width)
        max_random = int(img_height * 0.1) - obj_height
        max_random = 0 if max_random < 0 else max_random
        y = random.randint(0, max_random)
    elif edge_choice == 'bottom':
        x = random.randint(0, img_width - obj_width)
        max_random = int(img_height * 0.1) - obj_height
        max_random = 0 if max_random < 0 else max_random
        y = random.randint(img_height - int(img_height * 0.1), img_height - int(img_height * 0.1) + max_random)
    elif edge_choice == 'left':
        max_random = int(img_width * 0.1) - obj_width
        max_random = 0 if max_random < 0 else max_random
        x = random.randint(0, max_random)
        y = random.randint(0, img_height - obj_height)
    elif edge_choice == 'right':
        max_random = int(img_width * 0.1) - obj_width
        max_random = 0 if max_random < 0 else max_random
        x = random.randint(img_width - int(img_width * 0.1), img_width - int(img_width * 0.1) + max_random)
        y = random.randint(0, img_height - obj_height)
    
    return x, y

### Explanation of addRandomText Function
The `addRandomText` function adds random text to an image using specified fonts and takes into account the dataset type, which affects the transparency of the text. It accepts three parameters: image, a PIL image to which the text will be added; fonts, a list of font file paths to choose from; and dataset_type, an integer that influences the text's transparency. The function starts by creating a drawing context for the image and generating a random text string composed of letters, digits, punctuation, and spaces with a length between 4 and 17 characters. It then selects a random font from the provided list and a random font size between 20 and 50. The bounding box of the text is calculated to determine its dimensions.

Next, the function generates a random rotation angle for the text and uses the `random_edge_position` function to find a random position along the edges of the image for placing the text. An RGBA image is created for the text to handle transparency, and the text is drawn onto this image with a randomly chosen color and transparency level. The transparency level depends on the dataset type: higher values for dataset types 1 and 2, and lower values for other types.

The text image is then rotated by the generated angle, and the rotated text image is pasted onto the original image at the calculated position. Finally, the function returns the modified image with the added text.

In [4]:
def addRandomText(image, fonts, dataset_type):
    """
    Adds random text to an image with specified fonts and dataset type.

    Parameters:
    image (PIL.Image): The image to which the text will be added.
    fonts (list): A list of font file paths to choose from.
    dataset_type (int): The type of dataset, which affects the text transparency.

    Returns:
    PIL.Image: The image with the added text.
    """
    # Create a drawing context for the image
    draw = ImageDraw.Draw(image)
    
    # Generate a random text string
    text = ''.join(random.choices(string.ascii_letters + string.digits + string.punctuation + ' ', k=random.randint(4, 17)))
    
    # Choose a random font and size
    font_path = random.choice(fonts)
    font = ImageFont.truetype(font_path, random.randint(20, 50))

    # Calculate the bounding box of the text
    left, top, right, bottom = draw.textbbox((0, 0), text, font=font)
    text_width = right - left
    text_height = bottom - top

    # Generate a random rotation angle
    angle = random.randint(0, 360)
    
    # Get a random position along the edges of the image for the text
    start_offset, y_offset = random_edge_position(image.size, [text_width, text_height], angle)

    # Create an RGBA image for the text
    text_image = Image.new('RGBA', (text_width, text_height), (0, 0, 0, 0))
    text_draw = ImageDraw.Draw(text_image)
    
    # Set the text transparency based on the dataset type
    if dataset_type == 1 or dataset_type == 2:
        alpha = random.randint(100, 255)
    else:
        alpha = random.randint(10, 100)
    
    # Generate a random color with the calculated transparency
    fill_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255), alpha)
    
    # Draw the text onto the text image
    text_draw.text((0, 0), text, font=font, fill=fill_color)
    
    # Rotate the text image
    rotated_text_image = text_image.rotate(angle, expand=1)
    
    # Paste the rotated text image onto the original image at the calculated position
    image.paste(rotated_text_image, (start_offset, y_offset), rotated_text_image)

    return image

### Explanation of addRandomLogo Function
The `addRandomLogo` function adds a randomly placed and resized logo to an image. It accepts two parameters: image, a PIL image to which the logo will be added, and logo, the logo image that will be placed on the main image. The function starts by determining a random width for the logo, which is between 1% and 10% of the width of the main image. The logo is then resized to this width while maintaining its aspect ratio, using the LANCZOS resampling method to preserve image quality.

Next, a random rotation angle between 0 and 360 degrees is generated for the logo. The function calculates a random position along the edges of the main image for placing the logo, using the `random_edge_position` function. This function takes into account the size of the image, the size of the logo, and the rotation angle to ensure the logo is positioned correctly along the edges.

Finally, the logo is pasted onto the main image at the calculated position, and the function returns the modified image with the added logo.

In [5]:
def addRandomLogo(image, logo):
    """
    Adds a random logo to an image.

    Parameters:
    image (PIL.Image): The image to which the logo will be added.
    logo (PIL.Image): The logo image to be added.

    Returns:
    PIL.Image: The image with the added logo.
    """
    # Determine a random width for the logo between 1% and 10% of the image width
    logo_x = random.randint(int(image.width * 0.01), int(image.width * 0.1))
    
    # Resize the logo while maintaining its aspect ratio
    logo = logo.resize((logo_x, int(logo.height * logo_x / logo.width)), Image.Resampling.LANCZOS)
    
    # Generate a random rotation angle for the logo
    angle = random.randint(0, 360)
    
    # Calculate a random position along the edges of the image for the logo
    position = random_edge_position(image.size, logo.size, angle)
    
    # Paste the logo onto the image at the calculated position
    image.paste(logo, position, logo)
    
    return image

### Explanation of saveMetadata Function
The `saveMetadata` function saves a list of metadata dictionaries to a CSV file. It accepts two parameters: metadata, which is a list of dictionaries containing the metadata, and output_dir, which is the directory where the CSV file will be saved. The function begins by extracting the keys from the first dictionary in the metadata list, which will be used as the header for the CSV file.

Next, it opens a new CSV file named `metadata.csv` in write mode within the specified output directory. A `DictWriter` object is created using the extracted keys as the fieldnames. The function writes the header row to the CSV file using the `writeheader` method of the `DictWriter` object. Finally, it writes all the rows of metadata to the CSV file using the `writerows` method, which iterates over the list of dictionaries and writes each one to the file. The function completes its task without returning any value.

In [6]:
def saveMetadata(metadata, output_dir):
    """
    Saves metadata to a CSV file.

    Parameters:
    metadata (list of dict): A list of dictionaries containing metadata.
    output_dir (str): The directory where the CSV file will be saved.

    Returns:
    None
    """
    # Get the keys from the first dictionary in the metadata list
    keys = metadata[0].keys()
    
    # Open a new CSV file in write mode within the specified output directory
    with open(os.path.join(output_dir, 'metadata.csv'), 'w', newline='') as output_file:
        # Create a DictWriter object with the keys as fieldnames
        dict_writer = csv.DictWriter(output_file, keys)
        
        # Write the header row to the CSV file
        dict_writer.writeheader()
        
        # Write all rows of metadata to the CSV file
        dict_writer.writerows(metadata)

### Explanation of extractAndWatermarkFrames Function
The `extractAndWatermarkFrames` function processes video files by extracting frames and adding watermarks (either logos or text) to them. It accepts five parameters: `video_path`, the directory containing the video files; `logo_dir`, the directory containing the logo images; `output_dir`, the directory where the output frames and metadata will be saved; `dataset_type`, an integer that affects the type and transparency of the watermarks; and `skip_frames`, an optional integer specifying the number of frames to skip before processing the next frame (default is 60).

The function starts by initializing an empty list for metadata and retrieves lists of video files and logo images from their respective directories. It also finds system fonts available for use. For each video, it opens the video file and reads frames in a loop. If the frame reading is successful and the current frame count is a multiple of `skip_frames`, it converts the frame to a PIL image and crops it to a size of `1024x1024 pixels centered` within the frame, if possible.

The original frame is saved as a JPEG file in the output directory. The function then randomly decides the number of watermarks to add to the frame, with the possibility of choosing between logos and text based on the dataset type. For each watermark, if a logo is chosen and available, the function attempts to open and add a random logo to the frame. If text is chosen, it adds random text to the frame using the available fonts.

The watermarked frame is then saved as a separate JPEG file in the output directory. The function appends metadata for each processed frame, including the original and watermarked filenames, to the metadata list. Once all frames in a video have been processed, it releases the video capture object. Finally, the function saves the collected metadata to a CSV file in the output directory.

In [7]:
def extractAndWatermarkFrames(video_path, logo_dir, output_dir, dataset_type, skip_frames=60):
    """
    Extracts frames from videos and adds watermarks (logos or text) to the frames.

    Parameters:
    video_path (str): The directory containing the video files.
    logo_dir (str): The directory containing the logo images.
    output_dir (str): The directory where the output frames and metadata will be saved.
    dataset_type (int): The type of dataset, which affects the type and transparency of the watermarks.
    skip_frames (int, optional): The number of frames to skip before processing the next frame. Default is 60.

    Returns:
    None
    """
    metadata = []
    videos = get_videos_from_directory(video_path)
    count = 0
    frame_count = 0
    logos = get_logos_from_directory(logo_dir)
    fonts = fm.findSystemFonts(fontpaths=None, fontext='ttf')

    for video in videos:
        cap = cv2.VideoCapture(video)
        print(video)
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            if frame_count % skip_frames == 0:
                # Convert the frame to a PIL image
                frame_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
                width, height = frame_image.size
                
                # Define the size for cropping the frame
                crop_width, crop_height = 1024, 1024
            
                # Calculate the coordinates for cropping the frame to the center
                left = (width - crop_width) / 2
                top = (height - crop_height) / 2
                right = (width + crop_width) / 2
                bottom = (height + crop_height) / 2
                
                # Crop the frame if it's larger than the defined crop size
                if width >= crop_width and height >= crop_height:
                    frame_image = frame_image.crop((left, top, right, bottom))

                original_filename = f"frame_{count}.jpg"
                watermarked_filename = f"frame_{count}_watermarked.jpg"

                # Save the original frame
                frame_image.save(os.path.join(output_dir, original_filename))

                # Randomly decide the number of watermarks to add
                num_watermarks = random.randint(1, 5)
                watermarked_image = frame_image

                for _ in range(num_watermarks):
                    if dataset_type == 1:
                        choice = random.choice(['logo', 'text'])
                    else:
                        choice = "text"
                        
                    if choice == 'logo' and logos:
                        try:
                            # Choose a random logo and add it to the frame
                            choice = random.choice(logos)
                            logo_image = Image.open(choice).convert("RGBA")
                            watermarked_image = addRandomLogo(watermarked_image, logo_image)
                        except UnidentifiedImageError:
                            print(f"Logo file cannot be opened: {choice}")
                        except Exception as e:
                            print(f"An error occurred while processing the logo file: {e}")
                    elif choice == 'text':
                        # Add random text to the frame
                        watermarked_image = addRandomText(watermarked_image, fonts, dataset_type)

                # Save the watermarked frame
                watermarked_image.save(os.path.join(output_dir, watermarked_filename))

                # Append the metadata for this frame
                metadata.append({
                    'original': original_filename,
                    'watermarked': watermarked_filename
                })
                count += 1
            frame_count += 1
            
        cap.release()

    # Save the metadata to a CSV file
    saveMetadata(metadata, output_dir)

## No Logo Low Opacity Train Dataset Creation
Below block creates No Logo Low Opacity Train Dataset

In [8]:
video_dir = 'movies_train'
logo_dir = 'logos'
output_dir = 'logo_and_high_opacity_watermark_dataset_train'
dataset_type = 1
extractAndWatermarkFrames(video_dir, logo_dir, output_dir, dataset_type, skip_frames=60)

movies_train\1.mp4
movies_train\10.mp4
movies_train\11.mp4
movies_train\12.mp4
movies_train\13.mp4
movies_train\14.mp4
movies_train\15.mp4
movies_train\16.mp4
movies_train\17.mp4
movies_train\18.mp4
movies_train\19.mp4
movies_train\2.mp4
movies_train\20.mp4
movies_train\21.mp4
movies_train\22.mp4
movies_train\23.mp4
movies_train\3.mp4
movies_train\4.mp4
movies_train\5.mp4
movies_train\6.mp4
movies_train\7.mp4
movies_train\8.mp4
movies_train\9.mp4


## No Logo Low Opacity Test Dataset Creation
Below block creates No Logo Low Opacity Test Dataset

In [9]:
video_dir = 'movies_test'
logo_dir = 'logos'
output_dir = 'logo_and_high_opacity_watermark_dataset_test'
dataset_type = 1
extractAndWatermarkFrames(video_dir, logo_dir, output_dir, dataset_type, skip_frames=60)

movies_test\t1.mp4
movies_test\t2.mp4
movies_test\t3.mp4
movies_test\t4.mp4
movies_test\t5.mp4


## No Logo High Opacity Train Dataset Creation
Below block creates No Logo High Opacity Train Dataset

In [11]:
video_dir = 'movies_train'
logo_dir = 'logos'
output_dir = 'no_logo_and_high_opacity_watermark_dataset_train'
dataset_type = 2
extractAndWatermarkFrames(video_dir, logo_dir, output_dir, dataset_type, skip_frames=60)

movies_train\1.mp4
movies_train\10.mp4
movies_train\11.mp4
movies_train\12.mp4
movies_train\13.mp4
movies_train\14.mp4
movies_train\15.mp4
movies_train\16.mp4
movies_train\17.mp4
movies_train\18.mp4
movies_train\19.mp4
movies_train\2.mp4
movies_train\20.mp4
movies_train\21.mp4
movies_train\22.mp4
movies_train\23.mp4
movies_train\3.mp4
movies_train\4.mp4
movies_train\5.mp4
movies_train\6.mp4
movies_train\7.mp4
movies_train\8.mp4
movies_train\9.mp4


## No Logo High Opacity Test Dataset Creation
Below block creates No Logo High Opacity Test Dataset

In [12]:
video_dir = 'movies_test'
logo_dir = 'logos'
output_dir = 'no_logo_and_high_opacity_watermark_dataset_test'
dataset_type = 2
extractAndWatermarkFrames(video_dir, logo_dir, output_dir, dataset_type, skip_frames=60)

movies_test\t1.mp4
movies_test\t2.mp4
movies_test\t3.mp4
movies_test\t4.mp4
movies_test\t5.mp4


## Logo High Opacity Train Dataset Creation
Below block creates Logo High Opacity Train Dataset

In [13]:
video_dir = 'movies_train'
logo_dir = 'logos'
output_dir = 'no_logo_and_low_opacity_watermark_dataset_train'
dataset_type = 3
extractAndWatermarkFrames(video_dir, logo_dir, output_dir, dataset_type, skip_frames=60)

movies_train\1.mp4
movies_train\10.mp4
movies_train\11.mp4
movies_train\12.mp4
movies_train\13.mp4
movies_train\14.mp4
movies_train\15.mp4
movies_train\16.mp4
movies_train\17.mp4
movies_train\18.mp4
movies_train\19.mp4
movies_train\2.mp4
movies_train\20.mp4
movies_train\21.mp4
movies_train\22.mp4
movies_train\23.mp4
movies_train\3.mp4
movies_train\4.mp4
movies_train\5.mp4
movies_train\6.mp4
movies_train\7.mp4
movies_train\8.mp4
movies_train\9.mp4


## Logo High Opacity Test Dataset Creation
Below block creates Logo High Opacity Test Dataset

In [14]:
video_dir = 'movies_test'
logo_dir = 'logos'
output_dir = 'no_logo_and_low_opacity_watermark_dataset_test'
dataset_type = 3
extractAndWatermarkFrames(video_dir, logo_dir, output_dir, dataset_type, skip_frames=60)

movies_test\t1.mp4
movies_test\t2.mp4
movies_test\t3.mp4
movies_test\t4.mp4
movies_test\t5.mp4
