In [34]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv
from langchain.document_loaders import TextLoader, PyPDFLoader, Docx2txtLoader
from typing import List, Union
from gtts import gTTS
from langchain.schema import Document
load_dotenv()

True

In [7]:
def load_text_file(file_path: str) -> List[Document]:
    """
    Load a text file using LangChain's TextLoader
    
    Args:
        file_path (str): Path to the text file
        
    Returns:
        List[Document]: List of LangChain Document objects
    """
    loader = TextLoader(file_path,encoding='utf-8')
    return loader.load()

In [None]:
def load_pdf_file(file_path: str) -> List[Document]:
    """
    Load a PDF file using LangChain's PyPDFLoader
    
    Args:
        file_path (str): Path to the PDF file
        
    Returns:
        List[Document]: List of LangChain Document objects
    """
    loader = PyPDFLoader(file_path)
    return loader.load()

In [None]:
def load_word_file(file_path: str) -> List[Document]:
    """
    Load a Word document using LangChain's Docx2txtLoader
    
    Args:
        file_path (str): Path to the Word document
        
    Returns:
        List[Document]: List of LangChain Document objects
    """
    loader = Docx2txtLoader(file_path)
    return loader.load()

In [8]:
def load_document(file_path: str) -> List[Document]:
    """
    Load a document based on its file extension
    
    Args:
        file_path (str): Path to the document
        
    Returns:
        List[Document]: List of LangChain Document objects
    """
    if file_path.endswith('.txt'):
        return load_text_file(file_path)
    elif file_path.endswith('.pdf'):
        return load_pdf_file(file_path)
    elif file_path.endswith('.docx'):
        return load_word_file(file_path)
    else:
        raise ValueError(f"Unsupported file type: {file_path}")

In [26]:
text_docs = load_document("state_of_the_union.txt")
text_with_next_lines=(text_docs[0].page_content)
text=(','.join(text_with_next_lines.split('\n\n')))
# Load a PDF file
# pdf_docs = load_document("sample.pdf")

# Load a Word document
# word_docs = load_document("sample.docx")

In [29]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash")

In [37]:
response=llm.invoke(f'here is some data {text}. Can you generate summary from this data with focus on every detail. And generate summary directly.')
response.content

'President Biden\'s State of the Union address focused heavily on the war in Ukraine, economic recovery, and combating inflation.  He lauded the Ukrainian people\'s resistance to Russian aggression, highlighting the international coalition formed to impose sanctions on Russia, including freezing assets of oligarchs and closing American airspace to Russian flights.  He emphasized that while US forces are not engaged in Ukraine, they are deployed to protect NATO allies.\n\nEconomically, Biden touted the creation of over 6.5 million jobs last year and the passage of the Bipartisan Infrastructure Law, a massive investment in roads, bridges, airports, and electric vehicle charging stations. He also stressed the need to pass the Bipartisan Innovation Act to boost American manufacturing and compete with China, citing Intel\'s planned $20 billion (potentially $100 billion) investment in Ohio as an example.  He acknowledged inflation\'s impact on families and outlined a plan to combat it by low

In [35]:
def text_to_speech(text, output_file, language='en'):
    """Convert text to speech and save as MP3"""
    tts = gTTS(text=text, lang=language)
    tts.save(output_file)

In [38]:
output_path='summary_audio.wav'
text_to_speech(response.content,output_path)