<a href="https://colab.research.google.com/github/NinadShegokar/Langchain/blob/main/Summarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from IPython.display import clear_output
clear_output()

In [None]:
import logging
import sys
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:

from llama_index.llms.huggingface import HuggingFaceInferenceAPI
from langchain.embeddings.huggingface import HuggingFaceInferenceAPIEmbeddings
from llama_index.embeddings.langchain import LangchainEmbedding

In [None]:
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import MapReduceChain, MapReduceDocumentsChain, ReduceDocumentsChain
from langchain.chains.llm import LLMChain
from langchain_core.prompts import PromptTemplate
from transformers import AutoTokenizer, AutoModelForCausalLM
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import CharacterTextSplitter

HF_TOKEN = "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"
llm = HuggingFaceInferenceAPI(
model_name="mistralai/Mistral-7B-v0.1", token=HF_TOKEN
)
embed_model = LangchainEmbedding(
  HuggingFaceInferenceAPIEmbeddings(api_key=HF_TOKEN,model_name="thenlper/gte-large")
)

# Load PDF documents
loader = PyPDFLoader("Schizophrenia.pdf")
docs = loader.load_and_split()

# Define prompt template for MapReduce
map_template = """The following is a set of documents
{docs}
Based on this list of docs, please identify the main themes
Helpful Answer:"""
map_prompt = PromptTemplate.from_template(map_template)

# MapReduce chain
map_chain = MapReduceDocumentsChain(
    llm=LLMChain(llm=llm , prompt=map_prompt,),
    splitter=CharacterTextSplitter(),
    map_prompt=map_prompt,
    reduce_chain=ReduceDocumentsChain(),
)

# Define prompt template for StuffDocumentsChain
stuff_prompt_template = """Write a concise summary of the following in bullet points:
```{text}```
CONCISE SUMMARY:"""
stuff_prompt = PromptTemplate.from_template(stuff_prompt_template)

# Stuff Documents chain
stuff_chain = StuffDocumentsChain(
    llm_chain=LLMChain(llm=embed_model, prompt=stuff_prompt),
    document_variable_name="text"
)

# Run MapReduceChain
mapped_reduced_docs = map_chain.run(docs)

# Run StuffDocumentsChain on the output of MapReduceChain
print(stuff_chain.run(mapped_reduced_docs))
