In [1]:
import os
from dotenv import load_dotenv
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
import tempfile
import whisper
from pytubefix import YouTube
from langchain.prompts import ChatPromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate


In [2]:
api_key= os.getenv("OPENAI_API_KEY")

YOUTUBE_VIDEO = "https://www.youtube.com/watch?v=qSWvn5G1cdg"

model = ChatOpenAI(openai_api_key=api_key, model="gpt-4o-mini")

In [3]:
#parser = StrOutputParser()

#chain = model | parser
#chain.invoke("What MLB team won the World Series during the COVID-19 pandemic?")

In [5]:
# Let's do this only if we haven't created the transcription file yet.
if not os.path.exists("transcription.txt"):
    youtube = YouTube(YOUTUBE_VIDEO)
    audio = youtube.streams.filter(only_audio=True).first()

    # Let's load the base model. This is not the most accurate
    # model but it's fast.
    whisper_model = whisper.load_model("base")

    with tempfile.TemporaryDirectory() as tmpdir:
        file = audio.download(output_path=tmpdir)
        transcription = whisper_model.transcribe(file, fp16=False)["text"].strip()

        with open("transcription.txt", "w") as file:
            file.write(transcription)

In [6]:
with open("transcription.txt") as file:
    transcription = file.read()

transcription[:100]

"Good morning, everyone. There we are. Can you hear me? Good morning. The Saw's Board meeting of Nove"

In [7]:
loader = TextLoader("transcription.txt")
text_documents = loader.load()
text_documents

[Document(metadata={'source': 'transcription.txt'}, page_content="Good morning, everyone. There we are. Can you hear me? Good morning. The Saw's Board meeting of November 2nd, 2021 is called to order. The San Antonio Water System Board of Trustees will, during the meeting, close the meeting and hold an executive session pursuant to and in accordance with chapter 551 of the Texas Open Meeting's Act. The Board of Trustees may at any time during the meeting close the meeting and hold an executive session for consultation with its attorneys concerning any of the matters to be considered during the meeting pursuant to chapter 551 of the Texas Open Meeting's Act. So before I call for a public comment, I want to welcome our newest Board of Trustees member, Miss Mary Lorraine, so welcome. Okay, next we have our third and final briefing item number C, Tracy. Hello, good morning again. If you'll indulge me, I just wanted to give a brief intro to Tracy because we're doing this one a little bit di

In [11]:
extra_terms="Lotte"
prompt_template="""Summarize the following text: {transcription}.  Please also call out any reference to {extra_terms}."""
llm=model
prompt=PromptTemplate(template=prompt_template, input_variables=["transcription", "extra_terms"])

#summary_chain = LLMChain(llm=model, prompt="Summarize the following text: {input_text}")
summary_chain = LLMChain(prompt=prompt, llm=llm)

output=summary_chain.invoke({'transcription': transcription, 'extra_terms': extra_terms})


In [12]:
if 'text' in output:
    summary_text = output['text']
    print("Summary:", summary_text)

    # Specify the path where the summary should be saved
    summary_file_path = 'summary_output.txt'

Summary: During the San Antonio Water System Board meeting on November 2, 2021, the Board discussed the Lotte Ranch track development, located in the northwest service area. The developer is seeking utility service for a 1,160-acre site, requesting 3,000 water equivalent dwelling units (EDUs) while proposing to build its own wastewater treatment plant, which raised environmental concerns. The site is within the city's extraterritorial jurisdiction (ETJ) but outside the existing wastewater service area.

Tracy presented various options for wastewater service, including constructing multiple lift stations, a gravity sewer main, or the developer's preferred option of an onsite wastewater treatment plant. Each option posed challenges, particularly regarding environmental impacts and the need for easements. The Board emphasized the importance of careful consideration given the area's delicate ecosystem and existing conservation easements.

Trustees raised concerns about the implications of 

In [None]:
with open(summary_file_path, 'w', encoding='utf-8') as summary_file:
    summary_file.write(summary_text)
    print(f"Summary has been written to {summary_file_path}.")
    

Summary has been written to summary_output.txt.
