In [2]:
import validators
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.chains.summarize import load_summarize_chain
from langchain_community.document_loaders import YoutubeLoader,UnstructuredURLLoader
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.schema import Document


In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")

llm = ChatGroq(model="Gemma2-9b-It")


In [4]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate


output_parser=JsonOutputParser()
prompt = PromptTemplate(
    template="""
    Please analyze the video, create a summary, and provide a clickable index to its major contents.
    \n{format_instructions}\n. Content :{text}\n""",
    input_variables=["text"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)


In [5]:
def extract_video_id(url):
    if "youtube.com/watch?v=" in url:
        return url.split("v=")[1].split("&")[0]
    elif "youtu.be/" in url:
        return url.split("youtu.be/")[1].split("?")[0]
    else:
        return None

# Input from user
url = input("Enter a YouTube URL: ")

# Validate and process
if validators.url(url):
    video_id = extract_video_id(url)
    if video_id:
        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
            transcript_text = " ".join([t['text'] for t in transcript_list])
            document = Document(page_content=transcript_text)
            print("\n--- Transcript ---\n")
            print(document.page_content)
        except Exception as e:
            print("Error fetching transcript:", e)
    else:
        print("Invalid YouTube URL format.")
else:
    print("Not a valid URL.")



--- Transcript ---

at this neighborhood in Dulan Mushidabad residents share harrowing tales of what they were subjected to rioting looting and arson as you can see telltale signs of what happened yesterday still fresh this morning there are residents who have shared their tales of their businesses being completely uh you know gutted attacked looted and then set on fire there are houses here uh residential houses which have been attacked broken into and men barged into these houses residents here tell us that this locality is located just a few meters away from the local police station at least half a kilometer is where the police station exists and yet despite repeated SOS calls being made to the police to rescue them to help them no help came because the police themselves were under attack with camera person Tapus Bi in Rjit for India today in Dulan Mushidabad


In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100 , chunk_overlap=20)
documents =text_splitter.split_documents(documents=[document])
documents

[Document(metadata={}, page_content='at this neighborhood in Dulan Mushidabad residents share harrowing tales of what they were subjected'),
 Document(metadata={}, page_content='they were subjected to rioting looting and arson as you can see telltale signs of what happened'),
 Document(metadata={}, page_content='of what happened yesterday still fresh this morning there are residents who have shared their tales'),
 Document(metadata={}, page_content='shared their tales of their businesses being completely uh you know gutted attacked looted and then'),
 Document(metadata={}, page_content='looted and then set on fire there are houses here uh residential houses which have been attacked'),
 Document(metadata={}, page_content='have been attacked broken into and men barged into these houses residents here tell us that this'),
 Document(metadata={}, page_content='tell us that this locality is located just a few meters away from the local police station at least'),
 Document(metadata={}, page_c

In [7]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template="""
    Provide the summary of the provided chunk. Think  step by step before providing a detailed answer.Summarize the following chunk in a clear, concise, and informative way. Maintain important facts and avoid repetition.
    \n{documents}\n{query}\n""",
    input_variables=["query","documents"],
)

In [8]:
chain =prompt|llm
def summarize_chunk(chunk):
    text=chain.invoke({"query":"give the summary","documents":{chunk}})
    return text
partial_summaries = [summarize_chunk(c.page_content) for c in documents]
final_combined_summary = " ".join(item.content for item in partial_summaries)
from langchain_core.prompts import ChatPromptTemplate
prompt2=ChatPromptTemplate.from_messages(
    [
        ("system","give the summary of the following text. Use proper headings, subheadings, bullet points , paragraphs where appropriate.Use HTML tags to format the text. Use a clear and concise writing style.For new line use <br> tag. "),
        ("user","{input}")
    ]
)
chain2=prompt2|llm 
def summarize_combined_summary(combined_summary):
    text=chain2.invoke({"input":{combined_summary}})
    return text
final_summary = summarize_combined_summary(final_combined_summary) 

In [9]:
final_summary.content

"<h1>Dulan Mushidabad Under Attack: Residents Fear and Share Stories</h1>\n\n<p>Residents of Dulan Mushidabad are sharing harrowing tales of violence and chaos that erupted in their community. </p>\n\n<h2>Evidence of Recent Violence</h2>\n\n<ul>\n  <li>The area bears visible scars of rioting, looting, and arson.</li>\n  <li>Residents continue to discuss and recount their experiences from the previous day.</li>\n</ul>\n\n<h2>Targeted Attacks and Destruction</h2>\n\n<p>The violence has profoundly impacted residents' lives:</p>\n\n<ul>\n  <li>Businesses have been severely damaged by attacks, looting, and destruction.</li>\n  <li>Residential houses have been looted and burned.</li>\n  <li>Homes have been attacked, broken into, and residents have reported men forcing their way into their houses.</li>\n</ul>\n\n<h2>Lack of Police Response Despite SOS Calls</h2>\n\n<p>Adding to the residents' fear and frustration, despite the close proximity of a police station (located at least half a kilome