### Connect All API Keys

In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()

True

In [3]:
## Langsmith Tracking

os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [4]:
# Load the GROQ API Key

os.environ['GROQ_API_KEY'] = os.getenv("GROQ_API_KEY")
groq_api_key = os.getenv("GROQ_API_KEY")

### Video Link Summarization using Map Reduce Method

In [5]:
# Import required libraries
import numpy as np
from langchain.document_loaders import YoutubeLoader
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.chains import load_summarize_chain
from langchain.schema import Document

In [6]:
url = "https://www.youtube.com/watch?v=oNAd-Uyk0S8"

In [7]:
#To check the function for in which language transcript is avilable

def get_transcript_languages(youtube_video_url):
    try:
        video_id = youtube_video_url.split("v=")[-1].split("&")[0]
        languages = YouTubeTranscriptApi.list_transcripts(video_id)
        return [lang.language for lang in languages]
    except TranscriptsDisabled:
        return "Subtitles are disabled for this video."
    except Exception as e:
        return f"Error fetching transcript languages: {e}"

In [8]:
print(get_transcript_languages(url))

['English (auto-generated)']


In [9]:
# Language selection

select_lang = get_transcript_languages(url)[0][:2].lower()
print(select_lang)

en


In [10]:
#Load the youtube URL

loader = YoutubeLoader.from_youtube_url(url,language=[select_lang],translation=select_lang)
docs = loader.load()
docs

[Document(metadata={'source': 'oNAd-Uyk0S8'}, page_content="this is amazing this might be the  biggest moment for generative AI  Community especially for open source AI  meta has just released Lama 3.1 which  has 405 billion parameters this is the  most sophisticated open source model  ever released it is already  outperforming GPT 4.0 on several  benchmarks and other state-ofthe-art  models in this video I am specially  going to show you how you can use meta  Lama 3.1 model using hugging fist in  just four simple steps with python  coding language so before starting let  me give a short introduction about  myself welcome to ad Academy the main  Moto of this channel is AI for arenta my  name is Dr Ayan dnat I am an i Delhi  alumni and fber research scholar at  Harvard University I have total 9 plus  years of experience in the field of  artificial intellig deep learning  machine learning NLP generative AI let's  watch this video Welcome to My metav ver  I am using Google cab you can use

In [11]:
#Text details avilable in video link

transcript = docs[0].page_content
print(transcript)

this is amazing this might be the  biggest moment for generative AI  Community especially for open source AI  meta has just released Lama 3.1 which  has 405 billion parameters this is the  most sophisticated open source model  ever released it is already  outperforming GPT 4.0 on several  benchmarks and other state-ofthe-art  models in this video I am specially  going to show you how you can use meta  Lama 3.1 model using hugging fist in  just four simple steps with python  coding language so before starting let  me give a short introduction about  myself welcome to ad Academy the main  Moto of this channel is AI for arenta my  name is Dr Ayan dnat I am an i Delhi  alumni and fber research scholar at  Harvard University I have total 9 plus  years of experience in the field of  artificial intellig deep learning  machine learning NLP generative AI let's  watch this video Welcome to My metav ver  I am using Google cab you can use any ID  where you want to work just like jupon  notebook Pa

In [12]:
#Show the information regarding video metadata

for key, value in docs[0].metadata.items():
    print(f"{key}: {value}")

source: oNAd-Uyk0S8


In [13]:
#Total number of characters avilable in text

length = len(transcript)
print(length)

10159


In [14]:
# Split the transcript into chunks

text_splitter = RecursiveCharacterTextSplitter(chunk_size=len(transcript)/5, chunk_overlap=50)
text_chunks = text_splitter.split_text(transcript)

In [15]:
# Convert text chunks into Document objects

final_documents = [Document(page_content=chunk) for chunk in text_chunks]

In [16]:
# Define the map prompt template

chunks_prompt = """
You are a summarization model using a map-reduce approach. 
Task is to summarize the text provided below. 
You do not change the language of the text. 
Focus only on creating a clear and concise summary while maintaining the original meaning.
<text>
{text}
<text>
Summary:
"""
map_prompt_template = PromptTemplate(input_variables=['text'], template=chunks_prompt)

In [17]:
# Define the final prompt template
final_prompt = """
You are a summarization model using the map-reduce approach. 
Your task is to create a final summary from the text provided below. 

Make sure the summary:
- Is clear and easy to understand.
- Focuses on the main ideas and leaves out unnecessary details.
- Is between 10 to 300 words, depending on the length of the original content.
- Does not change the language of the text.

<text>
{text}
<text>

Final summary:
"""

final_prompt_template = PromptTemplate(input_variables=['text'], template=final_prompt)

In [18]:
# Initialize the LLM

llm = ChatGroq(model="llama-3.3-70b-versatile", groq_api_key=groq_api_key)

In [19]:
# Load the summarization chain

summary_chain = load_summarize_chain(
    llm=llm,
    chain_type="map_reduce",
    map_prompt=map_prompt_template,
    combine_prompt=final_prompt_template,
    verbose=False)

In [20]:
# Show the summarization chain on the final documents

output = summary_chain.run(final_documents)

  output = summary_chain.run(final_documents)
  from .autonotebook import tqdm as notebook_tqdm


In [21]:
print(output)

The Meta AI community has released Lama 3.1, a sophisticated open-source model with 405 billion parameters. To use this model, users can follow a four-step process using Python and the Hugging Face library. First, they need to obtain an access token by creating a new token on the Hugging Face website and requesting permission from the repository author. Once access is granted, users can load the model, customize its configuration, and use it for text generation by providing a prompt.

The process involves downloading the model from the Hugging Face repository, modifying its JSON configuration file, and loading the model with the modified configuration. Users can also fine-tune the model parameters according to their specific needs. The model can be used for various applications, including text generation, and can be downloaded to a local system using a command line.

The video guide, presented by Dr. Ayan, provides a step-by-step implementation using Python packages like Transformers a

### Summray Text to Speech using gTTS

In [22]:
#Import All Required Librearies

from gtts import gTTS
import io
from IPython.display import Audio

In [23]:
# Selection of language which is our output language

language = select_lang

In [24]:
myobj = gTTS(text=output, lang=language, slow=False)

In [25]:
# Save the audio in memory

audio_fp = io.BytesIO()
myobj.write_to_fp(audio_fp)
audio_fp.seek(0)


0

In [26]:
#Play the Audio

Audio(audio_fp.read(), autoplay=True)

In [None]:
# Saving the audio file

# myobj.save("summary_speech.mp3")

In [None]:
#Play the audio

# Audio("welcome.mp3")