In [50]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [51]:
from IPython.display import Video
import os
import pickle
import subprocess
from tqdm import tqdm
from utils_text import (timestamp_to_sec, sec_to_timestamps, create_sentences_from_words, squash_timestamps)

In [52]:
class VideoCutter:
    def __init__(self, sentences_list, video_path, transcription_path, save_path = 'result'):
        self.video_path = video_path
        self.sentences_list = sentences_list
        
        with open(transcription_path, 'rb') as f:
            self.transcription_results = pickle.load(f)
            
        self.tmp_folder = save_path

        
    def cut_one_video(self, video, start, end, output_name):

        cmd = f"ffmpeg -y -copyts -ss {start} -i '{video}' -threads {os.cpu_count()} -to {end} -map 0 '{output_name}' -loglevel error"

        subprocess.run(cmd, shell=True)


    def concat_videos(self, filename, output_name):

        cmd = f"ffmpeg -y -f concat -safe 0 -threads {os.cpu_count()} -i '{filename}' -c copy '{output_name}' -loglevel error"

        subprocess.run(cmd, shell=True)


    def cut_videos(self):
        print ('Cutting videos')
        for num, timestamp in tqdm(enumerate(self.render_times['timestamps'])):
            output_name = num
#             output_name = f"Short {int(self.output_dict['Ranking'][num][0])+1} {self.output_dict['Name'][num]}"
#             output_name = output_name.replace("'", '').replace('"', '').replace(':', '')

            output_name = f"{self.tmp_folder}'/videos/'{output_name}.mp4"

            if len(timestamp)>1:
                concat_list = []
                for count, cut in enumerate(timestamp):
                    try:
                        start = timestamp_to_sec(cut[0])
                        end = timestamp_to_sec(cut[1])
                    except:
                        start = cut[0]
                        end = cut[1]

                    output_name_tmp = f'concat_videos/{num}_{count}.mp4'      
                    concat_list.append(f"file {output_name_tmp}\n")
                    output_name_tmp = f'{self.tmp_folder}/concat_videos/{num}_{count}.mp4'      
                    self.cut_one_video(self.video_path, start, end, output_name_tmp)
                with open(f"{self.tmp_folder}/file.txt", "w") as output:
                    output.writelines(concat_list)
                self.concat_videos(f"{self.tmp_folder}/file.txt", output_name)
            else:
                try:
                    start = timestamp_to_sec(timestamp[0][0])
                    end = timestamp_to_sec(timestamp[0][1])
                except:
                    start = timestamp[0][0]
                    end = timestamp[0][1]
                self.cut_one_video(self.video_path, start, end, output_name)
                
    def process (self):
        
        os.makedirs(f"{self.tmp_folder}", exist_ok=True)
        os.makedirs(f"{self.tmp_folder}/videos", exist_ok=True)
        os.makedirs(f"{self.tmp_folder}/concat_videos", exist_ok=True)
                    
        self.sentence_dict, self.subtitles_dict = create_sentences_from_words(self.transcription_results) 
        self.render_times = {"timestamps": [], "positions": []}

        for highlight in self.sentences_list:
            positions, new_timestamps = squash_timestamps(highlight, self.sentence_dict["timestamps"])
            self.render_times["positions"].append(positions)
            self.render_times["timestamps"].append(new_timestamps)
            
        self.cut_videos()


In [53]:
sentences = [[0, 1, 2, 9, 10], [13, 14, 15]]

# sentences = [[14,15,16]]

In [54]:
transcription_path = "files/transcription.pickle"
video_path = "files/downloaded_video.mp4"
results_path = "files/results.pickle"
with open(results_path, 'rb') as f:
    results = pickle.load(f)

In [55]:
videocutter = VideoCutter(sentences, video_path, transcription_path)

In [56]:
videocutter.process()

Cutting videos


2it [00:04,  2.17s/it]


In [67]:
videocutter.sentence_dict['sentences']

["My first guest is the man who made electric cars a thing and is currently working on perfecting reusable rockets, space travel, connecting the human brain directly to computers, connecting cities with electromagnetic bullet trains, the Starlink satellite system that's so important to the war in Ukraine.",
 "And then on Tuesday, he's going to work on that tunnel thing on traffic.",
 'He also tweets a lot.',
 'Elon Musk, ladies and gentlemen.',
 'Look at you.',
 'Wow.',
 'Did I get the full order of things that you do in a day there?',
 'When I was reading there, I left out the tunnel thing at the end.',
 'Uh, yeah.',
 'A lot of jobs.',
 'Do you do all these things every day?',
 'Do you work on all of them in a single day?',
 'No.',
 'No.',
 'But I do have...',
 'I do have a long work day.',
 'Um, yeah.',
 'So...',
 'I work a lot.',
 "Well, I'm so thrilled you're here, because, you know, we do a show where we talk about what changes happen in the world, but we just talk.",
 "There's ve

In [63]:
videocutter.sentences_list

[[0, 1, 2, 9, 10], [13, 14, 15]]

In [59]:
videocutter.render_times

{'timestamps': [[[0.0, 22.5], [47.38, 49.96]], [[52.94, 56.24]]],
 'positions': [[[0, 2], [9, 10]], [[13, 15]]]}