In [None]:
#imports
import langchain 
from torch import cuda, bfloat16
from fpdf import FPDF
import torch
import transformers
from transformers import AutoTokenizer
from time import time
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import TextLoader,PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain,ConversationalRetrievalChain,StuffDocumentsChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import CTransformers
from langchain.vectorstores import FAISS
from langchain.memory import ConversationBufferMemory
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain import PromptTemplate, LLMChain

In [None]:
#loading downloaded llm suitable for local usage,temperature(entropy/randomness in answer):1e-2
llm = CTransformers(model=r"C:\Users\Medha\miniconda3\m3_topic_summ\models\llama-2-7b-chat.ggmlv3.q2_K.bin", model_type="llama", streaming=True, 
                    callbacks=[StreamingStdOutCallbackHandler()],
                    config={'max_new_tokens':4096,'temperature':0.01, 'context_length':4096})



In [None]:
#load the pdf files from the path
loader = PyPDFLoader(r'C:\Users\Medha\miniconda3\m3_topic_summ\data\lec4\lec4_transcript.pdf')
docs = loader.load()  

# #load the pdf files from the path
# loader = DirectoryLoader(r'C:\Users\Medha\miniconda3\m3_topic_summ\data\textbooks_extra_materials',glob="*.pdf",loader_cls=PyPDFLoader)
# docs = loader.load()


In [None]:

def summary_generation(file_path):
    # Map
    loader = PyPDFLoader(file_path)
    docs = loader.load() 
    map_template = """The following is a set of documents
    {docs}
    Based on this list of docs, please identify the main themes and concepts
    Expand the description of each topic and concept for 2-3 lines that should include its basic descriptions,key points and formulas if any.
    Helpful Answer:"""
    map_prompt = PromptTemplate.from_template(map_template)
    map_chain = LLMChain(llm=llm, prompt=map_prompt)
    # Reduce
    reduce_template = """The following is set of summaries:
    {docs}
    Take these and distill it into a final, consolidated summary of the main topics and concepts that should include definitions and formulas of the concepts.Mention all the key points and formulas related to a concept. 
    Expand the description of each topic and concept for 2-3 lines.
    Helpful Answer:"""
    reduce_prompt = PromptTemplate.from_template(reduce_template)
        # Run chain
    reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

    # Takes a list of documents, combines them into a single string, and passes this to an LLMChain
    combine_documents_chain = StuffDocumentsChain(
        llm_chain=reduce_chain, document_variable_name="docs"
    )

    # Combines and iteravely reduces the mapped documents
    reduce_documents_chain = ReduceDocumentsChain(
        # This is final chain that is called.
        combine_documents_chain=combine_documents_chain,
        # If documents exceed context for `StuffDocumentsChain`
        collapse_documents_chain=combine_documents_chain,
        # The maximum number of tokens to group documents into.
        token_max=4000,
    )
        # Combining documents by mapping a chain over them, then combining results
    map_reduce_chain = MapReduceDocumentsChain(
        # Map chain
        llm_chain=map_chain,
        # Reduce chain
        reduce_documents_chain=reduce_documents_chain,
        # The variable name in the llm_chain to put the documents in
        document_variable_name="docs",
        # Return the results of the map steps in the output
        return_intermediate_steps=False,
    )

    text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=50
    )
    split_docs = text_splitter.split_documents(docs)
    all_summaries=map_reduce_chain.run(split_docs)
    print(all_summaries)
    
    return all_summaries 

In [None]:
def convert_paragraph_to_pdf(para,folder_name,file_name):
    # Paragraph to convert
    paragraph = para
    # Folder path to store the PDF
    folder_path = folder_name # Replace with your actual folder path
    # Desired PDF filename
    pdf_name = file_name+".pdf"  # Replace with your desired filename
    # Create a PDF object
    pdf = FPDF()
    # Add a page
    pdf.add_page()

    # Set font and font size
    pdf.set_font("Arial", size=12)
    # Write the paragraph to the PDF
    pdf.write(5, paragraph)
    # Save the PDF to the specified folder and filename
    pdf.output(f"{folder_path}/{pdf_name}", "F")

In [None]:
all=summary_generation(r'C:\Users\Medha\miniconda3\m3_topic_summ\data\lec4\lec4_transcript.pdf')

In [None]:

def convert_paragraph_text(text):
    # Open a text file for writing
    with open("summary.txt", "w") as file:
    # Write the model output paragraph to the file
        file.write(text)
