In [None]:
import os
import json
import re
from typing import List, Dict
import requests
from youtube_transcript_api import YouTubeTranscriptApi
from moviepy.editor import VideoFileClip, TextClip, CompositeVideoClip
import speech_recognition as sr
from google.cloud import texttospeech
import google.generativeai as genai
import arabic_reshaper
from bidi.algorithm import get_display
from pytube import YouTube
genai.configure(api_key= 'AIzaSyD4EuHeuUzCuPPkylR8kNDS0j7f8cLfyec')

### Setting Up Base Functions


In [None]:
def get_youtube_id(url):
    """Extract YouTube video ID from URL."""
    video_id = url.split("v=")[1]
    ampersand_pos = video_id.find("&")
    if ampersand_pos != -1:
        video_id = video_id[:ampersand_pos]
    print(f"Extracted YouTube ID: {video_id}")
    print('-----get_youtube_id is done-----')
    return video_id

def get_transcript(video_id, language ):
    """Get transcript from YouTube video."""
    try:
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        
        try:
            transcript = transcript_list.find_manually_created_transcript([language])
        except:
            try:
                transcript = transcript_list.find_generated_transcript([language])
            except:
                try:
                    transcript = transcript_list.find_transcript([language])
                except:
                    print(f"No transcript found for language: {language}")
                    return None
        
        transcript_data = transcript.fetch()
        print(f"Successfully retrieved transcript for video {video_id}")
        

        with open(f"{video_id}_transcript.txt", "w", encoding="utf-8") as f:
            for entry in transcript_data:
                f.write(f"[{entry['start']:.2f}s - {entry['start'] + entry['duration']:.2f}s] {entry['text']}\n")    
        
        
        with open(f"{video_id}_transcript.txt", "r", encoding="utf-8") as f:
            file_text = f.read()
        print('-----get_transcript is done-----')
        return file_text
        
    except Exception as e:
        print(f"Error getting transcript: {e}")
        print('-----get_transcript is done-----')
        return None

def chunking_transcript(transcript: str) -> List[str]:
    """Split transcript into chunks of 50 lines each."""
    lis_transcript = transcript.split("\n")
    len(lis_transcript)
    transcript = ''
    chunks = []
    for y in range(0, len(lis_transcript), 50):
        for x in lis_transcript[y:y+50]:
            transcript += x + '\n'
        chunks.append(transcript)
        transcript = ''
    
    print('-----chunking_transcript is done-----')
    return chunks
    
def download_youtube_video(video_id, download_path='./'):
    """Download YouTube video and return the file path."""
    for i in range(5):
        try:
            # Construct the YouTube video URL
            video_url = f"https://www.youtube.com/watch?v={video_id}"
            
            # Create YouTube object using pytubefix
            yt = YouTube(video_url)
            
            # Get the highest resolution stream available
            stream = yt.streams.get_highest_resolution()
            
            # Download the video to the specified path
            print(f"Downloading video {video_id}...")
            downloaded_video = stream.download(output_path = download_path)
            video_name = os.path.basename(downloaded_video)

            # Get the absolute file path
            video_file_path = os.path.abspath(downloaded_video)
            
            print(f"Video downloaded successfully: {video_file_path}")
            print('-----download_youtube_video is done-----')
            return video_name  # Return the path to the downloaded video

        except Exception as e:
            print(f"An error occurred while downloading the video: {e}")
            print('-----download_youtube_video is done-----')
            return None
    
def gemini_clip_choosing(prompt, chunks):
    #chunks = chunking_transcript(get_transcript(video_id, 'ar'))

    model = genai.GenerativeModel("gemini-1.5-flash")
    chat = model.start_chat(
        history=[
            {"role": "user", "parts": 'Hi'},
            {"role": "model", "parts": "Great to meet you. What would you like to know?"},
        ]
    )

    chat.send_message(prompt)

    for chunk in chunks:
        chat.send_message(chunk)
        print("Chunk sent")


    response = chat.send_message("That was the last chunk, you can start generating the output now.")

    response1 = response.text.replace('```', '').replace('json', '')
    
    print(response1.text)
    data = json.loads(response1)

    print('-----gemini_clip_choosing is done-----')
    return data


# here we need to put it in loop
def clip_videos(video_path, segment, output_path):
    """Create a short video clip with captions."""
    video = VideoFileClip(video_path)
    
    start_time = segment['start_time']
    end_time = segment['end_time']
    
    clip = video.subclip(start_time, end_time)
    
    clip.write_videofile(output_path, codec="libx264", audio_codec="aac")
    print(f"Created short video: {output_path}")
    print('-----clip_videos is done-----')


In [None]:
prompt1 = '''
You are a professional YouTube Shorts editor with a talent for picking the best clips from long videos, focusing on moments that can go viral. You excel at choosing clips based on keywords that optimize SEO and at finding strong hooks like interesting facts or mysteries.

### Input:
- You will receive a transcript of the video, which contains every sentence spoken and its timestamp.
- The transcription may be in Arabic or English, and the Arabic could be Egyptian Arabic (العامية المصري).
- The video might feature a single speaker or multiple speakers, such as in a podcast.

### Your Task:
I need you to identify 5 clips to create YouTube Shorts from the transcript. Follow these guidelines:

1. **Clip Length**: Each clip should be between 60 seconds long. You can calculate the duration using the time provided in the brackets, use math to calculate the whole length e.g., `[0.24s - 60.24s]`.
   
2. **Key Moments**: Look for emotionally charged, funny, surprising, or highly informative moments. These are the clips that will resonate more with viewers and have a higher chance of being shared widely.

3. **Unique Insights**: Identify clips that offer valuable tips, expert opinions, or unique insights. These should be clear and concise.

4. **High Engagement Sections**: If the video has viewer comments or likes at specific timestamps, focus on these sections as they are likely to be engaging.

5. **Strong Hooks**: Make sure each clip has a strong hook at the beginning to grab attention. Hooks can be a surprising fact, mystery, or statement that encourages viewers to keep watching.

6. **Trends and Keywords**: Incorporate trending topics and relevant keywords to optimize for discovery and SEO.

### Format of Output:
Provide the clips in a JSON format. Each clip should contain:
- The start and end times in seconds.
- summary of the topic in Arabic.
- why you choosed it
- suggest a name for the clip
- give score out of 10 for the clip, score based on how engaging the clip will be with the audience

### Example Output:
```json
[
        {
            "start_time": "0.24s",
            "end_time": "60.24s",
            "summary": "انت تعالى انت هتفضل قاعد كده وغيرك بيعمل"
            "why i choosed it" :
            "suggest a name for the clip":
            "score": "8/10"
        },
]
```

### Some Things to Consider:
- you will recieve the video transcript in chunks you will start to generate your response when i tell you the following ``That was the last chunk, you can start generating the output now.``
- you may be tricked by thinking that you must generate 5 clips of every chunk, but you should generate 5 clips from the whole transcript
- don't say anything just give me the 5 clips with the output format as we discussed
- double check the duration of the clips to make sure they are 60 seconds long
- in summary section don't give me the whole trascription

Think twice and critically about the content you choose to include in the clips. Your goal is to create engaging and shareable content that will attract viewers to the full video.
    '''

### Main Function


In [None]:
def main(youtube_url, language='ar'):
    video_id = get_youtube_id(youtube_url)
    transcript = get_transcript(video_id, language)
    
    if not transcript:
        print("Failed to get transcript")
        return "Failed to get transcript"
    
    chunks = chunking_transcript(transcript)
    best_segments = gemini_clip_choosing(prompt1, chunks)
    print(best_segments)

    video_path = download_youtube_video(video_id)
    print(video_path)
    
    shorts = []
    for i, segment in enumerate(best_segments):
        output_path = f"short_{video_id}_{i+1}.mp4"
        clip_videos(video_path, segment, output_path)
        shorts.append(output_path)
    
    print(f"Created {len(shorts)} shorts: {', '.join(shorts)}")
    print('-----main is done-----')
    return shorts

if __name__ == "__main__":
    youtube_url = input("Enter YouTube URL: ")
    language = input("Enter transcript language (e.g., en, ar): ")
    
    #Make New Folder
    folder_name = input('Enter Name of the Video: ')
    os.makedirs(folder_name, exist_ok=True)
    os.chdir(folder_name)
    
    result = main(youtube_url, language)
    print("Final result:", result)