In [1]:
import validators
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.chains.summarize import load_summarize_chain
from langchain_community.document_loaders import YoutubeLoader,UnstructuredURLLoader
from youtube_transcript_api import YouTubeTranscriptApi
from langchain.schema import Document


In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ["GROQ_API_KEY"]=os.getenv("GROQ_API_KEY")

llm = ChatGroq(model="Gemma2-9b-It")


In [3]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate


output_parser=JsonOutputParser()
prompt = PromptTemplate(
    template="""
    Please analyze the video, create a summary, and provide a clickable index to its major contents.
    \n{format_instructions}\n. Content :{text}\n""",
    input_variables=["text"],
    partial_variables={"format_instructions": output_parser.get_format_instructions()},
)


In [4]:
def extract_video_id(url):
    if "youtube.com/watch?v=" in url:
        return url.split("v=")[1].split("&")[0]
    elif "youtu.be/" in url:
        return url.split("youtu.be/")[1].split("?")[0]
    else:
        return None

# Input from user
url = input("Enter a YouTube URL: ")

# Validate and process
if validators.url(url):
    video_id = extract_video_id(url)
    if video_id:
        try:
            transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
            transcript_text = " ".join([t['text'] for t in transcript_list])
            document = Document(page_content=transcript_text)
            print("\n--- Transcript ---\n")
            print(document.page_content)
        except Exception as e:
            print("Error fetching transcript:", e)
    else:
        print("Invalid YouTube URL format.")
else:
    print("Not a valid URL.")



--- Transcript ---

Every single time someone watches
this video, it helps feed someone in need. And since we get a lot of views, I brought a lot of people! Here you go. Thank you, friend. Yeah, no problem. If you're wondering how this works, every time someone
watches this whole video, including you right
now, money is generated. And as this video grows, that
increasing amount of money will feed an astonishing amount of people. Everybody eats. But that's not all it's going to do. Mr. Beast! You watching this video
will supply farmland, build massive kitchens, fill
schools with food for their students, and save children from child labor. So, honestly, I'm glad you're here. There you go. 1500
people have now been fed. I'm very excited for the future. The meals you saw us
giving away are cost effective and jam-packed with nutrients and honestly, way healthier
than most meals in America. Jimmy, why does it feel like
I'm... carrying the whole thing? Let's get to feedin’! There are over 50

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100 , chunk_overlap=20)
documents =text_splitter.split_documents(documents=[document])
documents

[Document(metadata={}, page_content='Every single time someone watches'),
 Document(metadata={}, page_content='this video, it helps feed someone in need. And since we get a lot of views, I brought a lot of'),
 Document(metadata={}, page_content="I brought a lot of people! Here you go. Thank you, friend. Yeah, no problem. If you're wondering"),
 Document(metadata={}, page_content="If you're wondering how this works, every time someone"),
 Document(metadata={}, page_content='watches this whole video, including you right'),
 Document(metadata={}, page_content='now, money is generated. And as this video grows, that'),
 Document(metadata={}, page_content="increasing amount of money will feed an astonishing amount of people. Everybody eats. But that's"),
 Document(metadata={}, page_content="eats. But that's not all it's going to do. Mr. Beast! You watching this video"),
 Document(metadata={}, page_content='will supply farmland, build massive kitchens, fill'),
 Document(metadata={}, page_cont

In [6]:
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template="""
    Provide the summary of the provided chunk. Think  step by step before providing a detailed answer.Summarize the following chunk in a clear, concise, and informative way. Maintain important facts and avoid repetition.
    \n{documents}\n{query}\n""",
    input_variables=["query","documents"],
)

In [7]:
chain =prompt|llm
def summarize_chunk(chunk):
    text=chain.invoke({"query":"give the summary","documents":{chunk}})
    return text
partial_summaries = [summarize_chunk(c.page_content) for c in documents]
final_combined_summary = " ".join(item.content for item in partial_summaries)
from langchain_core.prompts import ChatPromptTemplate
prompt2=ChatPromptTemplate.from_messages(
    [
        ("system","give the summary of the following text. Use proper headings, subheadings, bullet points , paragraphs where appropriate.Use HTML tags to format the text. Use a clear and concise writing style.For new line use <br> tag. "),
        ("user","{input}")
    ]
)
chain2=prompt2|llm 
def summarize_combined_summary(combined_summary):
    text=chain2.invoke({"input":{combined_summary}})
    return text
final_summary = summarize_combined_summary(final_combined_summary) 

: 

In [None]:
final_summary.content



: 