# Mp3 and Video to Summary

In [91]:
import requests
import feedparser
import os
import datetime as dt
import pydub
import openai
import configparser
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from langchain.chains.summarize import load_summarize_chain
from transformers import GPT2TokenizerFast
from langchain.text_splitter import NLTKTextSplitter
text_splitter = NLTKTextSplitter(chunk_size=1000)

tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
config = configparser.ConfigParser()
config.read('config.ini')

openai.api_key =config['openai']['api_key']
os.environ["OPENAI_API_KEY"] = config['openai']['api_key']
dir_save = 'temp'
# check if exist dir_save
if not os.path.exists(dir_save):
    os.makedirs(dir_save)

## load model
llm = ChatOpenAI(temperature=0)

In [138]:
#
pydub.AudioSegment.ffprobe ='/opt/homebrew/bin/ffprobe'
rss_url = "https://feeds.buzzsprout.com/2022276.rss"

In [162]:

def get_recent_podcasts(feed_url, program_name=None):
    recent_podcasts = []
    feed = feedparser.parse(feed_url)

    for entry in feed.entries:
        published_date = dt.datetime(*entry.published_parsed[:6])
        delta = dt.datetime.now() - published_date
        if delta.days <= 7:
            if program_name:
                if program_name.lower() in entry['title'].lower():
                    recent_podcasts.append(entry)
            else:
                recent_podcasts.append(entry)

    return recent_podcasts

def download_mp3(url, filename):
    # Download mp3 file
    r = requests.get(url, allow_redirects=True)
    open(filename, 'wb').write(r.content)


In [158]:
programs = get_recent_podcasts(rss_url, 'Despegamos')

In [166]:
program = programs[0]
download_mp3(program.enclosures[0].href, 'test.mp3')

In [169]:
# User whisper openai api to convert mp3 to text
name_audio ='test.mp3'
file_save = os.path.join(dir_save,name_audio)
load_program = pydub.AudioSegment.from_mp3(file_save)


# segments
ten_minutes = 10 * 60 * 1000
# Inicializar variables
start = 0
end = ten_minutes
counter = 1

# Iterar a través del archivo de audio y cortarlo en segmentos de 10 minutos
names_div = []
format = name_audio.split('.')[-1]
while start < len(load_program):
    # Cortar segmento de 10 minutos
    segment = load_program[start:end]

    # Exportar segmento a archivo separado
    name_div = f"{counter}_{name_audio}"
    names_div.append(name_div)
    file_save = os.path.join(dir_save,name_div)
    segment.export(file_save, format=format)

    # Actualizar variables
    start += ten_minutes
    end += ten_minutes
    counter += 1



In [170]:
#Transcribe all segments
transcripts = []
docs = []
for name_div in names_div:
    file_save = os.path.join(dir_save,name_div)
    audio_file= open(file_save, "rb")
    transcript = openai.Audio.transcribe("whisper-1", audio_file)
    transcripts.append(transcript)
    docs.append(Document(transcript.text,metadata={'name':name_div}))



In [171]:
# join transcripts text
text = ' \n'.join([t.text for t in transcripts])
text

In [178]:
# save text
with open('text.txt', 'w') as f:
    f.write(text)

In [181]:
# read text
with open('text.txt', 'r') as f:
    text = f.read()

text

# summarize text

In [100]:
texts = text_splitter.split_text(text)
print(texts[0])

In [101]:
text_splitter = CharacterTextSplitter.from_huggingface_tokenizer(tokenizer, chunk_size=1000,
                                                                 chunk_overlap=0)
texts = text_splitter.split_text(text)

In [124]:
len(texts[0])

In [129]:
chain = load_summarize_chain(llm, chain_type="map_reduce")
chain.run(docs)

In [134]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(text)

In [135]:
len(texts)

'Tema 1: Economía y política en Europa\n- Se discute el tema del gasto social y su beneficio para los empresarios durante la crisis económica.\n- Se habla de la escasez de gas en Europa y la necesidad de reducir la demanda para el próximo invierno.\n- Se cuestiona si el mercado del gas está intervenido y si las empresas están comprando gas ruso a pesar de las sanciones.\n- Se menciona la fuerte inflación esperada y el plan de infraestructuras y gasto público presentado por Joe Biden.\n- Se detecta un aumento en el uso de tarjetas de crédito en Estados Unidos y se menciona la posible crisis financiera.\n- Se descubre que Rusia sigue realizando transferencias en alta mar de combustible en las costas de Ceuta, a pesar de la carta enviada por las autoridades españolas para detener esta práctica.\n\nTema 2: Economía global\n- Se menciona la falta de cumplimiento de algunas empresas occidentales en cuanto a las sanciones impuestas a Rusia en el comercio de petróleo y productos derivados.\n- 