In [8]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [9]:
from IPython.display import Video
import os
import numpy as np
import pandas as pd
import pickle
import subprocess
from tqdm import tqdm
from utils_text import (timestamp_to_sec, sec_to_timestamps, create_sentences_from_words, squash_timestamps, clean_script_for_prompt)

In [10]:
class VideoCutter:
    def __init__(self, sentences_list, video_path, transcription_path, save_path = 'result'):
        
        self.video_path = video_path
        self.sentences_list = sentences_list
        self.tmp_folder = save_path
        self.make_folders()
        
        with open(transcription_path, 'rb') as f:
            self.transcription_results = pickle.load(f)
            
        self.sentence_dict, self.subtitles_dict = create_sentences_from_words(self.transcription_results) 
        
    def make_folders(self):
        os.makedirs(f"{self.tmp_folder}", exist_ok=True)
        os.makedirs(f"{self.tmp_folder}/videos", exist_ok=True)
        os.makedirs(f"{self.tmp_folder}/concat_videos", exist_ok=True)
        
    def cut_one_video(self, video, start, end, output_name):
        
        cmd = f"ffmpeg -y -copyts -i '{video}' -ss {start} -threads {os.cpu_count()} -to {end} -map 0 '{output_name}' -loglevel error"

        subprocess.run(cmd, shell=True)


    def concat_videos(self, filename, output_name):

        cmd = f"ffmpeg -y -f concat -safe 0 -threads {os.cpu_count()} -i '{filename}' -c copy '{output_name}' -loglevel error"

        subprocess.run(cmd, shell=True)

    def cut_videos(self):
        print ('Cutting videos')
        for num, timestamp in tqdm(enumerate(self.render_times['timestamps'])):
            output_name = num
#             output_name = f"Short {int(self.output_dict['Ranking'][num][0])+1} {self.output_dict['Name'][num]}"
#             output_name = output_name.replace("'", '').replace('"', '').replace(':', '')

            output_name = f"{self.tmp_folder}'/videos/'{output_name}.mp4"

            if len(timestamp)>1:
                concat_list = []
                for count, cut in enumerate(timestamp):
                    try:
                        start = timestamp_to_sec(cut[0])
                        end = timestamp_to_sec(cut[1])
                    except:
                        start = cut[0]
                        end = cut[1]

                    output_name_tmp = f'concat_videos/{num}_{count}.mp4'      
                    concat_list.append(f"file {output_name_tmp}\n")
                    output_name_tmp = f'{self.tmp_folder}/concat_videos/{num}_{count}.mp4'      
                    self.cut_one_video(self.video_path, start, end, output_name_tmp)
                with open(f"{self.tmp_folder}/file.txt", "w") as output:
                    output.writelines(concat_list)
                self.concat_videos(f"{self.tmp_folder}/file.txt", output_name)
            else:
                try:
                    start = timestamp_to_sec(timestamp[0][0])
                    end = timestamp_to_sec(timestamp[0][1])
                except:
                    start = timestamp[0][0]
                    end = timestamp[0][1]
                self.cut_one_video(self.video_path, start, end, output_name)
                
    def process (self):
        
#         df = clean_script_for_prompt(self.sentence_dict, self.transcription_results, self.tmp_folder)
        
        self.render_times = {"timestamps": [], "positions": []}

        for highlight in self.sentences_list:
            positions, new_timestamps = squash_timestamps(highlight, self.sentence_dict["timestamps"])
            self.render_times["positions"].append(positions)
            self.render_times["timestamps"].append(new_timestamps)
            
        with open(f"{self.tmp_folder}/render_times.pickle", "wb") as f:
            pickle.dump(self.render_times, f)
            
        self.cut_videos()


In [11]:
sentences = [[0, 1, 2, 9, 10], [13, 14, 15]]

# sentences = [[14,15,16]]

In [5]:
transcription_path = "files/transcription.pickle"
video_path = "files/downloaded_video.mp4"

In [12]:
videocutter = VideoCutter(sentences, video_path, transcription_path)

In [None]:
videocutter.process()

In [15]:
sentence_dict = videocutter.sentence_dict

In [16]:
with open(transcription_path, 'rb') as f:
    transcription_results = pickle.load(f)

In [13]:
diarization = transcription_results["diarization"]

In [14]:
df = clean_script_for_prompt(sentence_dict, diarization, ".")

In [15]:
df.head(20)

Unnamed: 0,index,speaker,sentence,length
0,0,SPEAKER_03,In some ways the point of LinkedIn is obvious.,2.36
1,1,SPEAKER_03,It's not like Instagram where you're supposed ...,11.4
2,2,SPEAKER_03,LinkedIn however is where you go to network.,2.36
3,3,SPEAKER_03,It should be pretty straightforward.,2.0
4,4,SPEAKER_03,So why the hell is it so weird?,2.3
5,5,SPEAKER_03,"I'm outside the New York offices of LinkedIn, ...",14.2
6,6,SPEAKER_03,How microdosing helps me relate to the women i...,3.32
7,7,SPEAKER_03,What Andrew Tate can teach us about team build...,3.2
8,8,SPEAKER_03,At what age should your child start dropshipping?,2.74
9,9,SPEAKER_03,It's earlier than you think.,1.9


In [30]:
sentence_dict['sentences'][13:16]

['No.', 'But I do have...', 'I do have a long work day.']

In [63]:
videocutter.sentences_list

[[0, 1, 2, 9, 10], [13, 14, 15]]

In [59]:
videocutter.render_times

{'timestamps': [[[0.0, 22.5], [47.38, 49.96]], [[52.94, 56.24]]],
 'positions': [[[0, 2], [9, 10]], [[13, 15]]]}

In [5]:
Video("result/videos/0.mp4")