In [5]:
import os


BASE_OUTPUT_DIR = "output"
COMMENT_OUTPUT_DIR = os.path.join(BASE_OUTPUT_DIR, "comments")
POST_DIR = os.path.join(BASE_OUTPUT_DIR, "post")
POST_AUDIO_DIR = os.path.join(POST_DIR, "audio")
POST_IMAGE_DIR = os.path.join(POST_DIR, "images")
COMMENT_AUDIO_DIR = os.path.join(COMMENT_OUTPUT_DIR, "audio")
COMMENT_IMAGE_DIR = os.path.join(COMMENT_OUTPUT_DIR, "images")
DIRECTORIES = [POST_AUDIO_DIR, POST_IMAGE_DIR, COMMENT_AUDIO_DIR, COMMENT_IMAGE_DIR]

In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import logging 
import time
from gtts import gTTS

#TODO: Remove Automod comments

logger = logging.getLogger(__name__)



link = "https://www.reddit.com/r/MachineLearning/comments/11rtzv6/d_what_do_people_think_about_openai_not_releasing/"

def make_dir_if_not_exists(dir_name):
    if not os.path.exists(dir_name):
        os.makedirs(dir_name)

def delete_content_of_dir(dir_name):
    if os.path.exists(dir_name):
        for file in os.listdir(dir_name):
            os.remove(os.path.join(dir_name, file))


class Viddit:
    def __init__(self, path_to_driver = "chromedriver.exe"):
        options = Options()
        # options.add_argument('--headless')
        # options.add_argument('--disable-gpu')  # Last I checked this was necessary.
        self.driver = webdriver.Chrome(path_to_driver, chrome_options=options)
        self.setup()

    def setup(self):
        [make_dir_if_not_exists(directory) for directory in DIRECTORIES]

    def delete_data(self):
        [delete_content_of_dir(directory) for directory in DIRECTORIES]
    
    def teardown(self):
        self.delete_data()
        self.driver.close()
        self.driver.quit()

    def accept_cookies(self):
        self.driver.get("https://www.reddit.com/")
        WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.XPATH, "//button[contains(text(), 'Accept all')]")))
        self.driver.find_element("xpath", "//button[contains(text(), 'Accept all')]").click()
        logger.info("Cookies accepted")
        self.driver.switch_to.default_content() 

    def scrape(self, post_url, no_comments = 5):
        # Load cookies to prevent cookie overlay & other issues
        # for cookie in config['reddit_cookies'].split('; '):
        #     cookie_data = cookie.split('=')
        #     driver.add_cookie({'name':cookie_data[0],'value':cookie_data[1],'domain':'reddit.com'})

        # Fetching the post itself, text & screenshot
        self.driver.get(post_url)
        post = WebDriverWait(self.driver, 20).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.Post')))
        post_text = post.find_element(By.CSS_SELECTOR, 'h1').text #TODO Text to speech
        post.screenshot(os.path.join(POST_IMAGE_DIR, "0.png"))
        tts = gTTS(post_text)
        tts.save(os.path.join(POST_AUDIO_DIR, "0.mp3"))
        # Let comments load
        self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3) #TODO Can be a WebDriverWait
        
        # Fetching comments & top level comment determinator
        comments = WebDriverWait(self.driver, 20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, 'div[id^=t1_][tabindex]')))
        allowed_style = comments[0].get_attribute("style")
        
        # Filter for top only comments
        comments = [comment for comment in comments if comment.get_attribute("style") == allowed_style][:no_comments]

        # Save time & resources by only fetching X content
        for i in range(len(comments)):
            # TODO Filter out locked comments (AutoMod) 
            # Scrolling to the comment ensures that the profile picture loads
            # Credits: https://stackoverflow.com/a/57630350
            desired_y = (comments[i].size['height'] / 2) + comments[i].location['y']
            window_h = self.driver.execute_script('return window.innerHeight')
            window_y = self.driver.execute_script('return window.pageYOffset')
            current_y = (window_h / 2) + window_y
            scroll_y_by = desired_y - current_y
            self.driver.execute_script("window.scrollBy(0, arguments[0]);", scroll_y_by)
            time.sleep(0.2)

            # Getting comment into string TODO TTS
            text = "\n".join([element.text for element in comments[i].find_elements(By.CSS_SELECTOR,'.RichTextJSON-root')])
            tts = gTTS(text)
            tts.save(os.path.join(COMMENT_AUDIO_DIR, f'{i}.mp3'))
            # Screenshot & save text
            comments[i].screenshot(os.path.join(COMMENT_IMAGE_DIR, f'{i}.png'))
        return self.driver


In [7]:
import cv2
import numpy as np
from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_audioclips, concatenate_videoclips
from moviepy.video.io.ImageSequenceClip import ImageSequenceClip
import logging

logger = logging.getLogger(__name__)

WAIT_TIME_SECONDS=2
def make_reddit_vid(background_video_path, png_paths, audio_paths):
    # Open the background video file
    cap = cv2.VideoCapture(background_video_path)

    # Get the video properties
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    logger.debug(f'Video properties: {fps} fps, {width}x{height} pixels')
    clips = []
    for i in range(len(png_paths)):
        # Read the PNG image to be overlayed
        png = cv2.imread(png_paths[i], cv2.IMREAD_UNCHANGED)
        
        # Calculate the total number of frames to display the PNG image using the audio duration
        audio_clip = AudioFileClip(audio_paths[i])
        expected_duration = audio_clip.duration + WAIT_TIME_SECONDS
        num_frames = int(expected_duration * fps)
        
        
        # Calculate the coordinates to place the PNG image in the center of the video frame
        png_height, png_width, png_channels = png.shape
        x = int((width - png_width) / 2)
        y = int((height - png_height) / 2)
        
        # Create a mask for the PNG image alpha channel
        alpha_mask = png[:, :, 3] / 255.0

        # Extract the RGB channels of the PNG image
        png_rgb = png[:, :, :3]
        frames = []

        logger.debug(f'Overlaying PNG image {i} for {num_frames} frames')
        for i in range(num_frames): # Each one of these complete loop completions is a clip associated with a single image/audio pair
            ret, frame = cap.read()
            if ret:
                # Extract the RGB channels of the video frame
                frame_rgb = frame[:, :, :3]

                # Multiply the PNG RGB channels with the alpha mask and add them to the frame RGB channels
                frame_rgb[y:y+png_height, x:x+png_width] = (1 - alpha_mask[:, :, np.newaxis]) * frame_rgb[y:y+png_height, x:x+png_width] + alpha_mask[:, :, np.newaxis] * png_rgb


                # Add the frame to the list of frames
                frames.append(frame_rgb) 
            else:
                break
        #create_video(f"temp_{str(i)}.mp4")
        video_clip = ImageSequenceClip(frames, fps=fps).set_audio(audio_clip)
        clips.append(video_clip)
    result_clip = concatenate_videoclips(clips)
    result_clip.write_videofile('result.mp4')

def create_video(frames, output, fps=30):
    # Get the shape of the frames
    height, width, _ = frames[0].shape

    # Create the video writer
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # Set the video codec
    out = cv2.VideoWriter(output, fourcc, fps, (width, height))

    # Write each frame to the video
    for frame in frames:
        out.write(frame)

    # Release the video writer
    out.release()
    
    
def overlay_audio_to_video(video_path, audio_paths, start_times, result_path):
    # Load the video file as a MoviePy clip
    video_clip = VideoFileClip(video_path)

    # Initialize a list to store the audio clips
    audio_clips = []

    # Iterate through the audio paths and start times
    for audio_path, start_time in zip(audio_paths, start_times):
        # Load the audio clip as a MoviePy clip
        audio_clip = AudioFileClip(audio_path)

        # Set the start time of the audio clip
        audio_clip = audio_clip.set_start(start_time)

        # Append the audio clip to the list of audio clips
        audio_clips.append(audio_clip)

    # Concatenate the audio clips
    concatenated_audio_clip = concatenate_audioclips(audio_clips)

    # Overlay the concatenated audio clip onto the video clip
    result_clip = video_clip.set_audio(concatenated_audio_clip)

    # Export the result clip to a new file
    result_clip.write_videofile(result_path)


    


In [9]:
NO_COMMENTS = 3
viddit = Viddit()
viddit.delete_data()
viddit.accept_cookies()
viddit.scrape("https://www.reddit.com/r/CBD/comments/11unx5h/cbd_for_depression_anxiety_and_pain_in_sweden/", NO_COMMENTS)
vid_input_list = [f"{POST_IMAGE_DIR}/0.png"]+ [f"{COMMENT_IMAGE_DIR}/{i}.png" for i in range(0, NO_COMMENTS)]
audio_input_list = [f"{POST_AUDIO_DIR}/0.mp3"]+ [f"{COMMENT_AUDIO_DIR}/{i}.mp3" for i in range(0, NO_COMMENTS)]
make_reddit_vid("minecraft_20m.mp4", vid_input_list,audio_input_list)


Moviepy - Building video result.mp4.
MoviePy - Writing audio in resultTEMP_MPY_wvf_snd.mp3


                                                                      

MoviePy - Done.
Moviepy - Writing video result.mp4



                                                                

Moviepy - Done !
Moviepy - video ready result.mp4
