# Video to Summary

In [4]:
import requests
import configparser
import datetime as dt
import pandas as pd
from langchain.prompts import PromptTemplate
import os
import openai
from langchain.document_loaders import YoutubeLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import ChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from tools import download_mp3 , chunk_text,divide_audio, divide_text, get_transcript, get_video_details
from tools import get_recent_podcasts


# Read config file
config = configparser.ConfigParser()
config.read('config.ini')

# set API key for OpenAI
openai.api_key =config['openai']['api_key']
os.environ["OPENAI_API_KEY"] = config['openai']['api_key']

dir_save = 'temp'

# check if exist dir_save
if not os.path.exists(dir_save):
    os.makedirs(dir_save)

# Create temp folder for programs
name = 'notebook'
dir_save_program = os.path.join(dir_save,name)
if not os.path.exists(dir_save_program):
    os.makedirs(dir_save_program)


In [5]:
def transcript_to_docs(transcript):
    no_estaba = True
    # Summarize text with OpenAI and langchain
    try:
        loader = YoutubeLoader.from_youtube_url(transcript['link'], add_video_info=False,language='en')
        data = loader.load()
    except:
        loader = YoutubeLoader.from_youtube_url(transcript['link'], add_video_info=False,language='es')
        data = loader.load()

    # add info 
    file_save = os.path.join(dir_save_program,transcript['title']+'.'+transcript['format'])
    file_save_txt = os.path.join(dir_save_program,transcript['title']+'.txt')
    transcript['transcript'] = data[0].page_content
    transcript['file_save_txt'] = file_save_txt

    # save text
    if len(transcript['transcript'])>0 and not os.path.exists(transcript['file_save_txt']):
        with open(transcript['file_save_txt'], 'w') as f:
            f.write(transcript['transcript'])
            print(f'File {transcript["file_save_txt"],} saved')    
    else:
        print(f'File {transcript["file_save_txt"],} already exist')
        no_estaba = False

    print( transcript['transcript'][-100:])

    ## split text into smaller chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=0) #default 4000
    return text_splitter.split_documents(data), no_estaba


def summarize_docs(docs, output_language='en'):
    # Summarize text with OpenAI and langchain
    if output_language == 'es':
        prompt_map= """ 
                Resumen en español el texto.\
                Organiza el resumen separando ideas y conceptos clave con guiones. \
                Asegurate de incluir datos, números y nombres relevantes.  \
                El resultado seguirá un formato similar al siguiente:        
                    [* Idea/Concepto clave 1  (dato o número relevante, nombre relacionado)]\
                    [* Idea/Concepto clave 2  (dato o número relevante, nombre relacionado)]\
                    [* Idea/Concepto clave 3  (dato o número relevante, nombre relacionado)]\
                    
                text  :
                    {text}    
                    """
        PROMPT_MAP = PromptTemplate(template=prompt_map, input_variables=["text"])


        prompt_combine = """
                Eres un redactor con 10 años de experiencia.\
                Tu trabajo es mejorar, organizar, limpiar y traducir en español el siguiente texto.\
                El texto tiene cada idea o concepto clave separado por guiones o puntos, une en un mismo guión las ideas o conceptos clave que sean similares.\
                El resultado seguirá un formato similar al siguiente:        
                        [* Idea/Concepto clave 1  (dato o número relevante, nombre relacionado)]\
                        [* Idea/Concepto clave 2  (dato o número relevante, nombre relacionado)]\
                        [* Idea/Concepto clave 3  (dato o número relevante, nombre relacionado)]\
                texto : {text}"""
    else:
        prompt_map= """ 
                Summary in English of the text.\
                Organize the summary by separating key ideas and concepts with dashes.\
                Make sure to include relevant data, numbers, and names.\
                The result will follow a similar format to the following:\
                [* Key Idea/Concept 1 (relevant data or number, related name)]\
                [* Key Idea/Concept 2 (relevant data or number, related name)]\
                [* Key Idea/Concept 3 (relevant data or number, related name)]\

t               text :
                    {text}    
                    """
        PROMPT_MAP = PromptTemplate(template=prompt_map, input_variables=["text"])


        prompt_combine = """
                You are a writer with 10 years of experience. \
                Your task is to improve, organize, clean, and translate the following text into English. \
                The text has each key idea or concept separated by dashes or dots.\
                  Combine similar key ideas or concepts with the same dash. The result will follow a similar format to the following:\
                [* Key Idea/Concept 1 (relevant data or number, related name)]\
                [* Key Idea/Concept 2 (relevant data or number, related name)]\
                [* Key Idea/Concept 3 (relevant data or number, related name)]\

                text: {text}"""



    PROMPT_COMBINE= PromptTemplate(template=prompt_combine, input_variables=["text"])
    ## use te summarize chain, "refine" type
    model_llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0)# type: ignore
    chain = load_summarize_chain(model_llm, chain_type="map_reduce", return_intermediate_steps=True, 
                                map_prompt=PROMPT_MAP, combine_prompt=PROMPT_COMBINE) 
    return chain({"input_documents": docs}, return_only_outputs=True)

In [6]:
# Read program names and rss urls
output_language = 'en' #en
program_to_rss_url = {'Despegamos': "https://www.youtube.com/feeds/videos.xml?channel_id=UCfYk6FdS8bFLrOz_n0b0YTQ",
                      'Juan Rallo': "https://www.youtube.com/feeds/videos.xml?channel_id=UCBLCvUUCiSqBCEc-TqZ9rGw",
                      'Marc Vidal': "https://www.youtube.com/feeds/videos.xml?channel_id=UCKC77AR_zWXRTE2GOD_2Uag",
                      'Huberman Lab': "https://www.youtube.com/feeds/videos.xml?channel_id=UC2D2CMWXMOVWx7giW1n3LIg",
                      'Crea y Transforma': "https://www.youtube.com/feeds/videos.xml?channel_id=UCs0QXHkbkBDM_NwnRiGFoTA",
                      'Niko Garnier': "https://www.youtube.com/feeds/videos.xml?channel_id=UClGDJkOcXBLA3qENf4CMcnw",
                      'The Rich Dad Channel': "https://www.youtube.com/feeds/videos.xml?channel_id=UCuifm5ns5SRG8LZJ6gCfKyw",
                     'TheDavidLin': "https://www.youtube.com/feeds/videos.xml?channel_id=UClBMLpP3UHXLmgEypMmXPuA",
                      'Bloomberg The Open': "https://www.youtube.com/feeds/videos.xml?channel_id=UCIALMKvObZNtJ6AmdCLP7Lg",
                      'Balance of Power': "https://www.youtube.com/feeds/videos.xml?channel_id=UCIALMKvObZNtJ6AmdCLP7Lg",
                      'Bloomberg Real Yield': "https://www.youtube.com/feeds/videos.xml?channel_id=UCIALMKvObZNtJ6AmdCLP7Lg",
                      'Bloomberg Surveillance':"https://www.youtube.com/feeds/videos.xml?channel_id=UCIALMKvObZNtJ6AmdCLP7Lg"
                      }
# fill if you only want to download some programs of the channel
name_to_program = {'Despegamos': 'Despegamos', 'Bloomberg The Open':'Bloomberg The Open',
                       'Bloomberg Real Yield':'Bloomberg Real Yield',
                       'Balance of Power':'Balance of Power',
                       'Bloomberg Surveillance':'Bloomberg Surveillance'}

for p in program_to_rss_url.keys():
    if 'youtube' in program_to_rss_url[p] and p not in name_to_program.keys():
        name_to_program[p] = ''


is_link = False
for name in program_to_rss_url.keys():
    if is_link:
        link = 'https://www.youtube.com/watch?v=OJh8OoAVe9Q'
        programs =  get_recent_podcasts(link, is_link=True)
    else:
        programs = get_recent_podcasts(program_to_rss_url[name], program_name = name_to_program[name])


    ## Youtube Loader

    print(len(programs))
    if len(programs)>0:
        try:
            transcript = programs[0]
            docs,no_estaba =transcript_to_docs(transcript)
        except:
            transcript = programs[1]
            docs,no_estaba = transcript_to_docs(transcript)
        if no_estaba:
            print(f'Summarizing {transcript["title"]} {transcript["published"]}')
            output_summary = summarize_docs(docs, output_language=output_language)
            
            print(output_summary['output_text'])

            # Send by telegram

            channelId = config['telegram']['channelId']
            telegramApiKey = config['telegram']['api_key'] 

            messageText = f'*{name}* : {transcript["title"]} {transcript["published"]}  \n' + output_summary['output_text']


            for part in divide_text(messageText):
                telegramResult = requests.get(
                    f"https://api.telegram.org/bot{telegramApiKey}/sendMessage",
                    params={"chat_id": channelId, "text": part}
                )
                print(telegramResult)




2
File ('temp/notebook/AMA #7: Cold Exposure, Maximizing REM Sleep & My Next Scientific Studies.txt',) saved
at's
hubermanlab.com/premium. And as always, thank you for
your interest in science. [MUSIC PLAYING]
Summarizing AMA #7: Cold Exposure, Maximizing REM Sleep & My Next Scientific Studies 2023-05-31 12:00:26
- Andrew Huberman introduces the Huberman Lab podcast and its premium subscriber channel, which supports research on mental and physical health and performance. The premium channel costs $10 per month or $100 for the year and provides access to AMAs, transcripts, and exclusive content. The Tiny Foundation will match all funds raised for research through the premium channel.
- Deliberate cold exposure can increase the release and production of immune molecules and cells. A study showed that repeated deliberate cold exposure over six weeks can lead to trends towards increased immune system markers. The molecules epinephrine and norepinephrine released during deliberate cold expo