## Install dependencies

In [None]:
#!pip install langchain openai-whisper
#!pip install openai
#!pip install translators gTTS
#!pip install yt_dlp

## Import libraries needed

In [None]:
# Import Langchain
from langchain.document_loaders.generic import GenericLoader
from langchain.document_loaders.parsers import OpenAIWhisperParser
from langchain.document_loaders.blob_loaders.youtube_audio import YoutubeAudioLoader

from langchain import OpenAI, LLMChain
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.docstore.document import Document
import textwrap

# Import whisper
import whisper

# Translate text
import translators as ts
from gtts import gTTS

# Other modules
import re
import os

import gtts.lang
print(gtts.lang.tts_langs())

## Transcribe audio to text whith YoutubeAudioLoader

In [None]:
def transcribe_audio(url='',save_dir="./"):
    #save_dir="docs/youtube/"
    loader = GenericLoader(
    YoutubeAudioLoader([url],save_dir),
    OpenAIWhisperParser()
    )
    transcribed_text = loader.load()
    transcribed_text = transcribed_text[0].page_content
    return transcribed_text

## Save transcript (text)

In [None]:
# Save the transcribed text to a file
def save_transcript(text,save_file="./texto.txt"):
    with open(save_file, "w") as file:
        file.write(transcribed_text)
    #print("Transcribed text saved to text.txt")

## Summarize text

In [None]:
def summarize_text(model_name,file_text='text.txt'):
    
    llm = OpenAI(model_name=model_name, temperature=0)

    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, chunk_overlap=0, separators=[" ", ",", "\n"]
    )

    with open(file_text) as f:
        text = f.read()

    texts = text_splitter.split_text(text)
    docs = [Document(page_content=t) for t in texts[:4]]

    chain = load_summarize_chain(llm, chain_type="map_reduce")

    output_summary = chain.run(docs)
    wrapped_text = textwrap.fill(output_summary, width=100)
    #print(wrapped_text)
    return wrapped_text

## Save summary to a text file

In [None]:
def save_summary_text(text,save_file="summary_english.txt"):
    # Save summary to a text file
    with open(save_file, 'w', encoding='utf-8') as f:
        f.write(text)

## Translate to other language

In [None]:
def translate_language(text,lang_trans='es'):
    # Translate language
    summary_ms = ts.translate_text(text,from_language='auto',to_language=lang_trans)
    return summary_ms

## Transcript summary text to audio

In [None]:
def text_to_audio(text,save_output_file='audio_ouput',language=""):
    # the text that you want to convert to audio
    # text = "your text here"

    # Passing the text and language to the engine,
    # here we have marked slow=False. Which tells
    # the module that the converted audio should
    # have a high speed
    myobj = gTTS(text=text, lang=language, slow=False)

    # Saving the converted audio in a mp3 file named
    # output
    myobj.save(f"{save_output_file}_{language}.mp3")

## Example

In [None]:
# To use the program change the url and lang
# url: setting the youtube video's url 
# lan: setting the audio output language

url = 'https://youtu.be/yWMKYID5fr8'
lang = 'fr'

if __name__ == '__main__':
    text = transcribe_audio(url=,save_dir="./")
    print(20*'*')
    print(text)
    save_transcript(text,save_file="./transcribe_text.txt")
    text = summarize_text(model_name="text-davinci-003",file_text='transcribe_text.txt')
    print(20*'*')
    print(text)
    save_summary_text(text,save_file="summary_english.txt")
    text = translate_language(text,lang_trans=lang)
    print(20*'*')
    print(text)
    text_to_audio(text,save_output_file='audio_ouput',language=lang)

In [None]:
#text_to_audio(text,save_output_file='audio_ouput',language=lang)