<a href="https://colab.research.google.com/github/Manika2219/Join_Videos_from_Transcript.ipynb/blob/main/Join_Videos_from_Transcript.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# Step 1: Install Necessary Libraries
!pip install yt-dlp openai-whisper moviepy fuzzywuzzy
!apt-get install -y ffmpeg

# Step 2: Import Required Libraries
import os
import subprocess
from moviepy.video.io.VideoFileClip import VideoFileClip
from moviepy.video.compositing.concatenate import concatenate_videoclips
import whisper
from fuzzywuzzy import fuzz

# Step 3: Define Functions
def download_video(youtube_url, output_path="video.mp4"):
    """
    Download the video using yt-dlp.
    """
    try:
        command = [
            "yt-dlp",
            "--format", "mp4",
            "--output", output_path,
            youtube_url,
        ]
        subprocess.run(command, check=True)
        print(f"Video downloaded successfully: {output_path}")
    except subprocess.CalledProcessError as e:
        print(f"Error downloading video: {e}")
        raise

def transcribe_audio(audio_path):
    """
    Generate a transcript from the audio file using Whisper.
    """
    try:
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        print("Transcription completed.")
        return result["text"], result["segments"]
    except Exception as e:
        print(f"Error transcribing audio: {e}")
        raise

def extract_video_segment(input_video, start_time, end_time, output_video):
    """
    Extract a specific segment from a video.
    """
    command = [
        "ffmpeg",
        "-i", input_video,
        "-ss", str(start_time),
        "-to", str(end_time),
        "-c", "copy",
        output_video,
    ]
    try:
        subprocess.run(command, check=True)
        print(f"Extracted segment saved to {output_video}")
    except subprocess.CalledProcessError as e:
        print(f"Error extracting video segment: {e}")
        raise

def fuzzy_find_text_segment(segments, search_text):
    """
    Use fuzzy matching to find the best match for the input text in the transcript.
    """
    best_match = None
    highest_score = 0
    for segment in segments:
        segment_text = segment["text"]
        score = fuzz.ratio(search_text.lower(), segment_text.lower())
        if score > highest_score:
            highest_score = score
            best_match = segment

    if best_match and highest_score > 70:  # 70% match threshold
        return best_match["start"], best_match["end"], best_match["text"]
    else:
        return None, None, None

# Step 4: Main Workflow
def main():
    # Part 1: Ask how many videos to concatenate
    num_videos = int(input("How many YouTube videos do you want to concatenate? "))
    video_urls = []
    for i in range(num_videos):
        url = input(f"Enter the URL for video {i + 1}: ")
        video_urls.append(url)

    # Create folders for intermediate files
    if not os.path.exists("videos"):
        os.mkdir("videos")
    if not os.path.exists("segments"):
        os.mkdir("segments")

    segments = []

    # Process each video
    for i, url in enumerate(video_urls):
        video_file = f"videos/video_{i + 1}.mp4"
        audio_file = f"videos/audio_{i + 1}.mp3"

        # Download the video
        print(f"\nDownloading video {i + 1}...")
        download_video(url, video_file)

        # Generate transcript
        print(f"\nGenerating transcript for video {i + 1}...")
        transcript, segments_data = transcribe_audio(video_file)
        print(f"Transcript for video {i + 1}:\n{transcript}\n")

        # Ask user for text input to extract multiple portions
        while True:
            search_text = input(f"Enter the text you want to extract from video {i + 1} (leave empty to stop): ")
            if not search_text:
                break

            # Find segment based on fuzzy matching
            start_time, end_time, matched_text = fuzzy_find_text_segment(segments_data, search_text)
            if start_time is None or end_time is None:
                print(f"Could not find a good match for the specified text in video {i + 1}. Skipping this portion.")
            else:
                print(f"Found match: {matched_text}")
                print(f"Start Time: {start_time}s, End Time: {end_time}s")

                # Ask for confirmation from the user to extract the segment
                confirm = input(f"Do you want to extract this segment? (y/n): ")
                if confirm.lower() == 'y':
                    # Extract segment
                    segment_file = f"segments/segment_{i + 1}_{search_text[:5]}.mp4"  # Using part of text as segment identifier
                    extract_video_segment(video_file, start_time, end_time, segment_file)
                    segments.append(segment_file)

    # Part 4: Concatenate all selected segments
    print("\nConcatenating selected video segments...")
    clips = []
    for segment in segments:
        try:
            clip = VideoFileClip(segment)
            clips.append(clip)
            print(f"Segment {segment} loaded successfully.")
        except Exception as e:
            print(f"Error loading segment {segment}: {e}")

    if clips:
        final_clip = concatenate_videoclips(clips, method="compose")
        final_clip.write_videofile("final_video.mp4", codec="libx264")
        print("\nFinal video saved as final_video.mp4.")
    else:
        print("No valid video segments to concatenate.")

# Run the main workflow
main()


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.
How many YouTube videos do you want to concatenate? 2
Enter the URL for video 1: https://www.youtube.com/live/-iavTUQSp2M?si=u6qnOIhtf85k4fym
Enter the URL for video 2: https://youtu.be/mH1w9fQ-tiM?si=yq8xjKTLotZXB2HT

Downloading video 1...
Video downloaded successfully: videos/video_1.mp4

Generating transcript for video 1...


  checkpoint = torch.load(fp, map_location=device)



Transcription completed.
Transcript for video 1:
 Life, you learn a few things by reading about them. You learn a few more things by watching someone do something. You learn a few things by listening to something. But a few things by actually living your life. Now the advice that I'm going to share in this video are the things which I learned by living my life. I'm 24 right now and welcome to a few stories which made me say, fuck it. I want to start this video by telling you a couple of stories from my life. But now when I actually look at these stories, when I actually think about these stories myself, I feel like bro why the fuck did I do that? You'll understand why am I saying what am I exactly saying right now. But let's start with the first story. This was back when I was in school. Most of the time that I actually remember about my life with friends in school. And friends actually played like a huge role in the person that I am today. But this is not a good story or a story that 

  checkpoint = torch.load(fp, map_location=device)



Transcription completed.
Transcript for video 2:
 I'm gonna be 24 in a couple of days and if you're of age between 15 to 20, then the next 8 minutes are going to be the best 8 minutes of your life. So these are some of the most important lessons that I've learned in my life and I think you should definitely learn from my mistakes. The first thing, the biggest leverage that you have when you're of this age is that you have time. At the age of 18, people think that if I don't get into IIT or if I don't crack need, then my life is like gone and that's completely bullshit. What you have to understand is if you pick any skill, I mean to say any skill and try to get better at it by 1% every day. It'll not just be 37 times better at it by the end of the year but it'll actually be more than 1427 times better at it by the end of the second year. But people usually don't talk about this second or third or fourth year with a beautiful effect of compounding takes place. Now imagine you spend 5 yea



MoviePy - Done.
Moviepy - Writing video final_video.mp4






Moviepy - Done !
Moviepy - video ready final_video.mp4

Final video saved as final_video.mp4.
