In [1]:
from moviepy import *
from PIL import *
import torch
import cv2
import numpy as np
import pyttsx3

In [2]:
video_1 = cv2.VideoCapture("homework_1_test_video.mp4")
video_2 = cv2.VideoCapture("聖稜-雪山的脊樑©.mp4")

fps = video_1.get(cv2.CAP_PROP_FPS) 
frames = video_1.get(cv2.CAP_PROP_FRAME_COUNT)
frame_width_1 = int(video_1.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height_1 = int(video_1.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_width_2 = int(video_2.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height_2 = int(video_2.get(cv2.CAP_PROP_FRAME_HEIGHT))

fourcc = cv2.VideoWriter_fourcc(*'mp4v') 
default_video_1 = cv2.VideoWriter('default_video_1.mp4', fourcc, fps, (frame_width_1, frame_height_1))
default_video_2 = cv2.VideoWriter('default_video_2.mp4', fourcc, fps, (frame_width_2, frame_height_2))

while video_1.isOpened() and video_2.isOpened():
    ret_1, frame_1 = video_1.read()
    default_video_1.write(frame_1)
    ret_2, frame_2 = video_2.read()
    default_video_2.write(frame_2)
    if not ret_1 or not ret_2:
        break
        
video_1.release()
video_2.release()
default_video_1.release()
default_video_2.release()

In [3]:
def gray_level_mapping(image):
    image_tensor = torch.tensor(image).float() / 255.0
    channels = image_tensor.permute(2, 0, 1) 
    equalized_channels = []
    for channel in channels:
        channel_hist = torch.histc(channel, bins=256, min=0, max=1)
        cdf = channel_hist.cumsum(0)  
        cdf = (cdf - cdf.min()) / (cdf.max() - cdf.min())  
        equalized_channel = cdf[(channel * 255).long()]
        equalized_channels.append(equalized_channel)
    equalized_image = torch.stack(equalized_channels, dim=0).permute(1, 2, 0) * 255.0
    return equalized_image.byte().numpy()
    
def high_pass_filter(image):
    image_tensor = torch.tensor(image, dtype=torch.float32).permute(2, 0, 1) / 255.0
    channels, height, width = image_tensor.shape
    f_transform = torch.fft.fft2(image_tensor)
    f_transform_shifted = torch.fft.fftshift(f_transform)
    r = 30
    crow, ccol = height // 2, width // 2
    y, x = torch.meshgrid(torch.arange(height), torch.arange(width), indexing='ij')
    mask = torch.ones((channels, height, width), dtype=torch.float32)
    for i in range(channels):
        mask[i, (x - ccol) ** 2 + (y - crow) ** 2 <= r ** 2] = 0
    f_transform_filtered = f_transform_shifted * mask
    f_transform_ishifted = torch.fft.ifftshift(f_transform_filtered)
    image = torch.fft.ifft2(f_transform_ishifted)
    image = torch.abs(image) * 255.0
    image = image.permute(1, 2, 0).numpy().astype(np.uint8)
    return image

def histogram_equalization(image):
    channels = cv2.split(image)
    channels = [cv2.equalizeHist(channel) for channel in channels]
    return cv2.merge(channels)

def rgb_to_hsv(image):
    return cv2.cvtColor(image, cv2.COLOR_BGR2HSV)

In [4]:
default_video_1 = VideoFileClip("default_video_1.mp4")
default_video_2 = VideoFileClip("default_video_2.mp4")

video_1 = []
video_2 = []
video_3 = []
video_4 = []

for frame in default_video_1.iter_frames(fps=default_video_1.fps, dtype="uint8"):
    processed_frame_1 = gray_level_mapping(frame)
    processed_frame_2 = high_pass_filter(frame)
    video_1.append(processed_frame_1)
    video_2.append(processed_frame_2)

for frame in default_video_2.iter_frames(fps=default_video_2.fps, dtype="uint8"):
    processed_frame_3 = histogram_equalization(frame)
    processed_frame_4 = rgb_to_hsv(frame)
    video_3.append(processed_frame_3)
    video_4.append(processed_frame_4)

processed_video_1 = ImageSequenceClip(video_1, fps=default_video_1.fps)
processed_video_2 = ImageSequenceClip(video_2, fps=default_video_1.fps)
processed_video_3 = ImageSequenceClip(video_3, fps=default_video_2.fps)
processed_video_4 = ImageSequenceClip(video_4, fps=default_video_2.fps)

processed_video_1.write_videofile("processed_video_1.mp4", codec="libx264")
processed_video_2.write_videofile("processed_video_2.mp4", codec="libx264")
processed_video_3.write_videofile("processed_video_3.mp4", codec="libx264")
processed_video_4.write_videofile("processed_video_4.mp4", codec="libx264")

{'video_found': True, 'audio_found': False, 'metadata': {'major_brand': 'isom', 'minor_version': '512', 'compatible_brands': 'isomiso2mp41', 'encoder': 'Lavf58.76.100'}, 'inputs': [{'streams': [{'input_number': 0, 'stream_number': 0, 'stream_type': 'video', 'language': None, 'default': True, 'size': [1920, 1080], 'bitrate': 41605, 'fps': 30.0, 'codec_name': 'mpeg4', 'profile': '(Simple Profile)', 'metadata': {'Metadata': '', 'handler_name': 'VideoHandler', 'vendor_id': '[0][0][0][0]'}}], 'input_number': 0}], 'duration': 9.9, 'bitrate': 41607, 'start': 0.0, 'default_video_input_number': 0, 'default_video_stream_number': 0, 'video_codec_name': 'mpeg4', 'video_profile': '(Simple Profile)', 'video_size': [1920, 1080], 'video_bitrate': 41605, 'video_fps': 30.0, 'video_duration': 9.9, 'video_n_frames': 297}
C:\Users\useraux\AppData\Local\Programs\Python\Python313\Lib\site-packages\imageio_ffmpeg\binaries\ffmpeg-win-x86_64-v7.1.exe -i default_video_1.mp4 -loglevel error -f image2pipe -vf scal

                                                                                                                       

MoviePy - Done !
MoviePy - video ready processed_video_1.mp4
MoviePy - Building video processed_video_2.mp4.
MoviePy - Writing video processed_video_2.mp4



                                                                                                                       

MoviePy - Done !
MoviePy - video ready processed_video_2.mp4
MoviePy - Building video processed_video_3.mp4.
MoviePy - Writing video processed_video_3.mp4



                                                                                                                       

MoviePy - Done !
MoviePy - video ready processed_video_3.mp4
MoviePy - Building video processed_video_4.mp4.
MoviePy - Writing video processed_video_4.mp4



                                                                                                                       

MoviePy - Done !
MoviePy - video ready processed_video_4.mp4


In [None]:
clip1 = default_video_1.resized(height=360)
clip2 = processed_video_1.resized(height=360)
clip3 = processed_video_2.resized(height=360)
clip4 = default_video_2.resized(height=360)
clip5 = processed_video_3.resized(height=360)
clip6 = processed_video_4.resized(height=360)

merged_video = clips_array([
    [clip1, clip2, clip3],
    [clip4, clip5, clip6]
])

merged_video.write_videofile("merged_video.mp4", codec="libx264")

In [None]:
class TexttoSpeech:
    def __init__(self):
        self.engine = pyttsx3.init()
        voices = self.engine.getProperty('voices') 
        self.engine.setProperty('voice', voices[0].id)

    def text_to_speech(self,message):
        self.engine.say(message) 
        self.engine.runAndWait()
    
    def text_to_mp3(self,message,mp3file):
        self.engine.save_to_file(message, mp3file)
        self.engine.runAndWait()
        
ts = TexttoSpeech()

narration_texts='''哈囉，這是機器視覺作業的報告影片
讓我來逐一介紹每個影像所應用的處理技術
首先，最上一排是作業測試影片
這排使用pytorch的函式庫來處理
左上是原始影片
中上是灰階映射後的結果
右上是高通濾波器處理後的結果
接著，下面這排是聖稜-雪山的脊樑
這排使用open cv的函式庫來處理
左下是原始影片
中下是直方圖等化後的結果
右下是顏色空間從RGB轉換為HSV的結果
感謝您的觀看！'''
      
lines = [msg.strip() for msg in narration_texts.split('\n') if len(msg)>0]
speech= []
for i,msg in enumerate(lines):
    ts.text_to_mp3(msg,'subtitle-voiceover-{:04d}.mp3'.format(i))    
    speech.append(AudioFileClip('subtitle-voiceover-{:04d}.mp3'.format(i)))
        
duration       = np.array([0]+[s.duration for s in speech])   
cumduration    = np.cumsum(duration)
total_duration = int(cumduration[-1])+4    

generator = lambda txt: TextClip('msjh.ttc', txt, font_size=32, color='white')
subtitles = SubtitlesClip([((cumduration[i],cumduration[i+1]),s) for i,s in enumerate(lines)], make_textclip=generator, encoding='utf-8')

In [None]:
bgm = AudioFileClip("calm background.mp3")
bgm = bgm.subclipped(bgm.duration-total_duration).with_volume_scaled(0.15)
clip = VideoFileClip("merged_video.mp4")
clip = clip.with_speed_scaled(clip.duration/total_duration,total_duration)

final_clip = CompositeVideoClip([clip, subtitles.with_position(('center','bottom'))])
final_clip = final_clip.with_audio(CompositeAudioClip([bgm,concatenate_audioclips(speech)]))
final_clip.write_videofile("final_video.mp4")