## Img subtitle Removal
POC of Img Subtitle removal using computer vision

In [1]:
import cv2
import os
import numpy as np
import re

Subtiltle removal logic

In [None]:
def remove_subtiltle(image):
    image = cv2.imread(image)

    # Create a black image with the same shape as the original image
    mask = np.zeros(image.shape, np.uint8)

    # Convert the original image to grayscale
    tmp_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Apply a gaussian blur to the grayscale image. This can help to smooth the image and reduce noise.
    # tmp_image = cv2.GaussianBlur(tmp_image, (11,11), 0)
    # cv2.imshow("GaussianBlur", tmp_image)

    # Apply a binary threshold to the grayscale image. All pixels with a value greater than 240 will be set to 255 (white), and all others will be set to 0 (black).
    tmp_image = cv2.threshold(tmp_image, 240, 255, cv2.THRESH_BINARY)[1]
    cv2.imshow("threshold", tmp_image)

    # Create a rectangular structuring element (kernel) with dimensions 9x7. This will be used for morphological operations.
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,7))

    # Perform a morphological close operation on the image. This operation dilates then erodes the image, which can help to close small holes in the foreground.
    tmp_image = cv2.morphologyEx(tmp_image, cv2.MORPH_CLOSE, kernel)
    # cv2.imshow("morphologyEx", tmp_image)

    # Dilate the image. This expands the white regions, which can help to connect disjointed parts of an object.
    tmp_image = cv2.dilate(tmp_image, kernel, iterations=7)

    # Find the contours in the image. This returns a list of contours and a hierarchy. Each contour is a numpy array of (x,y) coordinates of boundary points of the object.
    contours, hierarchy = cv2.findContours(tmp_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)



    # Check if any contours were found
    if len(contours) != 0:
        # Find the contour with the largest area
        c = max(contours, key=cv2.contourArea)
        # Get the bounding rectangle for the largest contour
        x,y,w,h = cv2.boundingRect(c)

    # Convert the mask to grayscale
    mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
    # Copy the region of the recognized image defined by the bounding rectangle to the same region of the mask
    mask[y:y+h, x:x+w] = tmp_image[y:y+h, x:x+w]
    # Erode the mask. This shrinks the white regions, which can help to remove small bits of noise.
    mask = cv2.erode(mask, kernel, iterations=1)
    # Apply a Gaussian blur to the mask. This can help to smooth the edges of the mask.
    mask = cv2.GaussianBlur(mask, (5,5), 0)

    # Use the mask to inpaint the original image. This will fill in the regions defined by the mask with the color of the surrounding pixels.
    cleaned_image = cv2.inpaint(image, mask, 3, cv2.INPAINT_TELEA)

    # Display the original image, the recognized image, the mask, and the cleaned image
    cv2.imshow("original", image)
    cv2.imshow("b&w", tmp_image)
    cv2.imshow("mask", mask)
    cv2.imshow("clean", cleaned_image)

    # Wait for a key press before closing the image windows
    cv2.waitKey(0)
    # Save image
    cv2.imwrite('cleaned_image.jpg', cleaned_image)

In [None]:
img = str(input("Enter the image path: "))
remove_subtiltle(img)

## Video subtitle Removal
POC of Video Subtitle removal using computer vision

In [2]:
import time
import subprocess
#This is the folder the video will be in, and a subfolder for the temporary processing files.
dir = '' #Place video directory here
video_name = 'hello-there-sub.mp4' #Place video name here

audio_output = 'audio_output.mp3'
command = f'ffmpeg -i {os.path.join(dir, video_name)} -vn -y -ab 192k -ar 48000 -ac 2 {audio_output}'
subprocess.call(command, shell=True)

#Sorts alphanumerically with frame formatting
def sortedproper( l ):    
    convert = lambda text: int(text) if text.isdigit() else text 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

#Used for finding execution time
start_time = time.time()

#Video capture
vid = cv2.VideoCapture(os.path.join(dir, video_name))
frame_counter = 0

#Checks if Temp folder exists, if not, make one
if not os.path.exists(os.path.join(dir,'Temp')):
    os.mkdir(os.path.join(dir,'Temp'))
    print("Directory " , os.path.join(dir,'Temp') ,  " Created ")
else:    
    print("Directory " , os.path.join(dir,'Temp') ,  " already exists")
    # delete Temp content and folder and recreate it
    for file in os.listdir(os.path.join(dir,'Temp')):
        os.remove(os.path.join(dir,'Temp',file))
    os.rmdir(os.path.join(dir,'Temp'))
    os.mkdir(os.path.join(dir,'Temp'))
os.chdir(os.path.join(dir,'Temp'))

#Main process
#Runs through every frame to detect subtitles
#Saves each frame into Temp folder
while (frame_counter < vid.get(cv2.CAP_PROP_FRAME_COUNT)):
    ret, img = vid.read()
    name = "frame%d.jpg" % (frame_counter)

    if not ret:
        break

    if not os.path.exists(os.path.join(dir,"Temp/frame%d.jpg" % (frame_counter))):
        mask = np.zeros(img.shape, np.uint8)
        tmp_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        tmp_image = cv2.threshold(tmp_image, 240, 255, cv2.THRESH_BINARY)[1]
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (9,7))
        tmp_image = cv2.morphologyEx(tmp_image, cv2.MORPH_CLOSE, kernel)
        tmp_image = cv2.dilate(tmp_image, kernel, iterations=7)

        contours, hierarchy = cv2.findContours(tmp_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

        if len(contours) != 0:
            c = max(contours, key=cv2.contourArea)
            x,y,w,h = cv2.boundingRect(c)        
            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
            mask[y:y+h, x:x+w] = tmp_image[y:y+h, x:x+w]
            mask = cv2.erode(mask, kernel, iterations=1)
            mask = cv2.GaussianBlur(mask, (3,3), 0)

        if len(mask.shape) > 2:  
            mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)

        cleaned_image = cv2.inpaint(img, mask, 3, cv2.INPAINT_TELEA)      
        cv2.imwrite(name, cleaned_image)
        frame_counter += 1

#Move back to original video directory and begin saving frames into one video
os.chdir(dir)
# Define the codec using VideoWriter_fourcc and create a VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'avc1')
video = cv2.VideoWriter('video_no_audio.mp4', fourcc, int(vid.get(cv2.CAP_PROP_FPS)), (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))))
images = [img for img in os.listdir(os.path.join(dir,'Temp')) if img.endswith(".jpg")]
images = sortedproper(images)
for image in images:
    print(image)
    video.write(cv2.imread(os.path.join(dir, 'Temp', image)))

#Cleanup and print execution time
video.release()
video_with_audio = 'output_audio.mp4'
command = f'ffmpeg -i video_no_audio.mp4 -i {audio_output} -c:v copy -c:a aac -strict experimental {video_with_audio}'
subprocess.call(command, shell=True)
vid.release()
print("--- %s seconds ---" % (time.time() - start_time))
    

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/6.1.1_4 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopenvin

Directory  /Users/jeanlecigne/IdeaProjects/Maydays/md-p-04_automatic_subtitle_removal/data/Temp  already exists
frame0.jpg
frame1.jpg
frame2.jpg
frame3.jpg
frame4.jpg
frame5.jpg
frame6.jpg
frame7.jpg
frame8.jpg
frame9.jpg
frame10.jpg
frame11.jpg
frame12.jpg
frame13.jpg
frame14.jpg
frame15.jpg
frame16.jpg
frame17.jpg
frame18.jpg
frame19.jpg
frame20.jpg
frame21.jpg
frame22.jpg
frame23.jpg
frame24.jpg
frame25.jpg
frame26.jpg
frame27.jpg
frame28.jpg
frame29.jpg
frame30.jpg
frame31.jpg
frame32.jpg
frame33.jpg
frame34.jpg
frame35.jpg
frame36.jpg
frame37.jpg
frame38.jpg
frame39.jpg
frame40.jpg
frame41.jpg
frame42.jpg
frame43.jpg
frame44.jpg
frame45.jpg
frame46.jpg
frame47.jpg
frame48.jpg
frame49.jpg
frame50.jpg
frame51.jpg
frame52.jpg
frame53.jpg
frame54.jpg
frame55.jpg
frame56.jpg
frame57.jpg
frame58.jpg
frame59.jpg
frame60.jpg
frame61.jpg
frame62.jpg
frame63.jpg
frame64.jpg
frame65.jpg
frame66.jpg
frame67.jpg
frame68.jpg
frame69.jpg
frame70.jpg
frame71.jpg
frame72.jpg
frame73.jpg
frame74.jp

ffmpeg version 6.1.1 Copyright (c) 2000-2023 the FFmpeg developers
  built with Apple clang version 15.0.0 (clang-1500.1.0.2.5)
  configuration: --prefix=/usr/local/Cellar/ffmpeg/6.1.1_4 --enable-shared --enable-pthreads --enable-version3 --cc=clang --host-cflags= --host-ldflags='-Wl,-ld_classic' --enable-ffplay --enable-gnutls --enable-gpl --enable-libaom --enable-libaribb24 --enable-libbluray --enable-libdav1d --enable-libharfbuzz --enable-libjxl --enable-libmp3lame --enable-libopus --enable-librav1e --enable-librist --enable-librubberband --enable-libsnappy --enable-libsrt --enable-libssh --enable-libsvtav1 --enable-libtesseract --enable-libtheora --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libxvid --enable-lzma --enable-libfontconfig --enable-libfreetype --enable-frei0r --enable-libass --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopenvin

--- 35.841546058654785 seconds ---


[out#0/mp4 @ 0x7fbc9c80eec0] video:5433kB audio:218kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 0.287243%
size=    5667kB time=00:00:13.95 bitrate=3327.4kbits/s speed=50.2x    
[aac @ 0x7fbc9c81d280] Qavg: 2699.604
