# Project - YouTube Summary Generator

In [None]:
pip install openai -q

In [None]:
pip install pytube -q

In [None]:
# downloading the YouTube audio stream
def youtube_audio_downloader(link):
    from pytube import YouTube
    import os
    import re
    
    if 'youtube.com' not in link:
        print('Invalid YouTube link!')
        return False

    # yt = YouTube(link)
    
    # use this if you get an authentication error. This is the new way to authenticate to YT
    yt = YouTube(link, use_oauth=True, allow_oauth_cache=True)

    audio = yt.streams.filter(only_audio=True).first()
    print('Downloading the audio stream ...', end='')
    output_file = audio.download()
    if os.path.exists(output_file):
        print('Done!')
    else:
        print('Error downloading the file!')
        return False

    basename = os.path.basename(output_file)
    name, extension = os.path.splitext(basename)
    audio_file = f'{name}.mp3'
    audio_file = re.sub(r'\s+', '-', audio_file)
    os.rename(basename, audio_file)
    return audio_file


In [None]:
# transcribing the audio_file or translating it to English
def transcribe(audio_file, not_english=False):
    import os

    from openai import OpenAI
    client = OpenAI()

    if not os.path.exists(audio_file):
        print('Audio file does not exist!')
        return False

    if not_english:
        # translating to english
        with open(audio_file, 'rb') as f:
            print('Starting translating to English ...', end='')
            transcript = client.audio.translations.create(model='whisper-1', file=f)
            print('Done!')

    else: # transcribing
        with open(audio_file, 'rb') as f:
            print('Starting transcribing ... ', end='')
            transcript = client.audio.transcriptions.create(model='whisper-1', file=f)
            print('Done!')

    name, extension = os.path.splitext(audio_file)
    transcript_filename = f'transcript-{name}.txt'
    with open(transcript_filename, 'w') as f:
        f.write(transcript.text)

    return transcript_filename


In [None]:
# summarizing the transcript using GPT
def summarize(transcript_filename):
    import os
    from openai import OpenAI

    if not os.path.exists(transcript_filename):
        print('The transcript file does not exist!')
        return False

    with open(transcript_filename) as f:
        transcript = f.read()

    system_prompt = 'I want you to act as a Life Coach.'
    prompt = f'''Create a summary of the following text.
    Text: {transcript}

    Add a title to the summary.
    Your summary should be informative and factual, covering the most important aspects of the topic.
    Start your summary with an INTRODUCTION PARAGRAPH that gives an overview of the topic FOLLOWED
    by BULLET POINTS if possible AND end the summary with a CONCLUSION PHRASE.'''

    print('Starting summarizing ... ', end='')
    client = OpenAI()
    response = client.chat.completions.create(
        model='gpt-3.5-turbo',
        messages=[
            {'role': 'system', 'content': system_prompt},
            {'role': 'user', 'content': prompt}
        ],
#         max_tokens=2048,
        temperature=1

    )

    print('Done')
    r = response.choices[0].message.content
    return r


## Running the program

In [None]:
import os 
import openai

from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override=True)
openai.api_key = os.getenv('OPENAI_API_KEY')

In [None]:
link = 'https://www.youtube.com/watch?v=vOulsEGvSCo'

mp3_file = youtube_audio_downloader(link)

# the second argument (not_english=True) is necessary if the source file is not in English and needs to be
# translated as well
transcript_file = transcribe(mp3_file, not_english=False)
summary = summarize(transcript_file)
print('\n')
print(summary)
