In [42]:
from langchain_community.document_loaders import PyPDFLoader,PyPDFDirectoryLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain.chains.summarize import load_summarize_chain
from langchain_community.document_loaders.csv_loader import CSVLoader
from dotenv import load_dotenv
import os


In [11]:
# Load environment variables
load_dotenv()

# Load the GROQ and OpenAI API keys
os.environ['groq_api_key'] = os.getenv("GROQ_API_KEY")
groq_api_key = os.getenv("groq_api_key")

llm = ChatGroq(
    groq_api_key=groq_api_key,
    model_name="Llama3-8b-8192"
)

embeddings = HuggingFaceBgeEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  from tqdm.autonotebook import tqdm, trange


In [32]:
#loader=PyPDFDirectoryLoader("./us_census")
# loader = CSVLoader(file_path='Updated-Langchain\huggingface\sumsum\samsum-train - Copy.csv', csv_args={
#     'delimiter': ',',
#     'quotechar': '"',
#     'fieldnames': ['id', 'dialogue', 'summary']
# })
# loader = CSVLoader(file_path='./sumsum/samsum-train - Copy.csv', source_column="summary")
loader = CSVLoader(
    file_path='./sumsum/samsum-train - Copy.csv',
    source_column="summary",
    encoding="utf-8"
)
data = loader.load()
documents=loader.load()
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
final_documents=text_splitter.split_documents(documents)
final_documents[0]


Document(metadata={'source': 'Amanda baked cookies and will bring Jerry some tomorrow.', 'row': 0}, page_content="id: 13818513\ndialogue: Amanda: I baked  cookies. Do you want some?\r\nJerry: Sure!\r\nAmanda: I'll bring you tomorrow :-)\nsummary: Amanda baked cookies and will bring Jerry some tomorrow.")

In [16]:
# docs=final_documents.create_document(documents)
vectors = FAISS.from_documents(
            final_documents,
            embeddings
        )

In [33]:
chain=load_summarize_chain(llm=llm,chain_type="map_reduce")

In [34]:
out=chain.run(documents)

In [38]:
print(out)

Here is a concise summary of the text:

The text describes a collection of casual conversations and updates among friends, covering various topics such as plans, personal issues, recommendations, reminders, and everyday life situations. The tone is lighthearted and casual, with friends sharing their thoughts, plans, and experiences with each other.


In [41]:


# Load data from CSV file
loader = CSVLoader(file_path='./sumsum/samsum-train - Copy.csv', source_column="summary", encoding="utf-8")
documents = loader.load()  # Load all rows as documents

# Define a function to summarize a single line (row) from the CSV
def summarize_csv_line(row_index: int):
    # Ensure row_index is valid
    if row_index < 0 or row_index >= len(documents):
        raise ValueError(f"Invalid row_index {row_index}. Must be between 0 and {len(documents) - 1}.")
    
    # Extract the specific document
    selected_document = [documents[row_index]]  # Wrap in a list for the chain

    # Load the summarization chain
    chain = load_summarize_chain(llm=llm, chain_type="map_reduce")

    # Run the summarization
    summary = chain.run(selected_document)
    return summary

# Example: Summarize a single line from the CSV (e.g., row 3)
row_index = 9  # Adjust this to the specific row you want to summarize
summary = summarize_csv_line(row_index)

print(f"Summary for row {row_index}:", summary)


Summary for row 9: Matt asks Agnes out on a date to the Georgian restaurant in Kazimierz on Saturday at 6 pm, and she agrees to give it a try.
