In [41]:
import json
from langchain.chains import MapReduceDocumentsChain, ReduceDocumentsChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain_text_splitters import CharacterTextSplitter
from langchain_core.prompts import PromptTemplate
from langchain_groq import ChatGroq
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.llm import LLMChain
from langchain_google_genai import ChatGoogleGenerativeAI
from pprint import pprint

In [42]:
api_path: str = "/home/johan/Code/secrets/google_api.json"
with open(api_path, "r") as file:
    google_api = json.load(file)
GOOGLE_API_KEY = google_api["api_key"]

In [43]:
loader = PyPDFLoader("/home/johan/Code/llm_rag_test/bin/data/duedil/EADF PPM Updated - June 2023.pdf")
docs = loader.load()

# Split into smaller chunks
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
# texts = text_splitter.split_documents(documents)

In [44]:
# llm = ChatGroq(temperature=0, model="llama3-70b-8192", api_key=GROQ_API_KEY)
llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-flash-latest", google_api_key=GOOGLE_API_KEY, temperature=0)

# Map
map_template = """The following is a set of documents
{docs}
Based on this list of docs, please identify the main themes 
Helpful Answer:"""

map_prompt = PromptTemplate.from_template(map_template)
map_chain = LLMChain(llm=llm, prompt=map_prompt)

In [45]:
output = map_chain.run(docs)

In [46]:
pprint(output)

('The main themes of these documents are:\n'
 '\n'
 '**1. Enko Africa Debt Fund Offering:** This is a private placement '
 'memorandum (PPM) for a debt fund focused on investing in pan-African private '
 "and sovereign debt instruments. The document outlines the fund's investment "
 'objective, strategy, management structure, fees, and expenses.\n'
 '\n'
 '**2. Investment Opportunity in Africa:** The PPM highlights the market '
 "opportunity in Africa, emphasizing the continent's growing economy, emerging "
 'middle class, and increasing infrastructure investment. It positions the '
 'fund as a way for investors to capitalize on this growth and diversify their '
 'portfolios.\n'
 '\n'
 '**3. Regulatory Compliance:** The document extensively covers regulatory '
 'compliance in both Mauritius (where the fund is incorporated) and the United '
 'States. It addresses issues like anti-money laundering, FATCA, CRS, and the '
 'AIFMD.\n'
 '\n'
 '**4. Risk Factors:** The PPM provides a comprehe

In [47]:
reduce_template = """The following is set of summaries:
{docs}
Take these and distill it into a final, consolidated summary of the main themes. 
Helpful Answer:"""
reduce_prompt = PromptTemplate.from_template(reduce_template)

In [48]:
# Run chain
reduce_chain = LLMChain(llm=llm, prompt=reduce_prompt)

# Takes a list of documents, combines them into a single string, and passes this to an LLMChain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=reduce_chain, document_variable_name="docs"
)

# Combines and iteratively reduces the mapped documents
reduce_documents_chain = ReduceDocumentsChain(
    # This is final chain that is called.
    combine_documents_chain=combine_documents_chain,
    # If documents exceed context for `StuffDocumentsChain`
    collapse_documents_chain=combine_documents_chain,
    # The maximum number of tokens to group documents into.
    token_max=4000,
)

In [49]:
# Combining documents by mapping a chain over them, then combining results
map_reduce_chain = MapReduceDocumentsChain(
    # Map chain
    llm_chain=map_chain,
    # Reduce chain
    reduce_documents_chain=reduce_documents_chain,
    # The variable name in the llm_chain to put the documents in
    document_variable_name="docs",
    # Return the results of the map steps in the output
    return_intermediate_steps=True,
)

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=1000, chunk_overlap=0
)
split_docs = text_splitter.split_documents(docs)

In [51]:
split_docs

[Document(metadata={'source': '/home/johan/Code/llm_rag_test/bin/data/duedil/EADF PPM Updated - June 2023.pdf', 'page': 0}, page_content='CONFIDENTIAL    \nEnko Africa Debt Fund  \nA public company, limited by shares, incorporated under the laws of Mauritius  \nOffering of Class A and Class B Redeemable Shares  \nPRIVATE PLACEMENT MEMORANDUM  \n22 June 2023  \nMinimum Initial Subscription: US$250,000 for the Class A Shares and US$250,000 for the Class  \nB Shares  \nInvestment Manager:  Enko Fund Managers Limited  \nAdministrator:  IQ EQ Fund Services (Mauritius) Ltd  \nNo person has been authorized in connection with this offering to give any information or make \nany representations other than as contained in this Private Placement Memorandum.  \nTHE SECURITIES OFFERED HEREBY HAVE NOT BEEN FILED WITH OR APPROVED OR DISAPPROVED BY ANY REGULATORY \nAUTHORITY OF ANY COUNTRY, NOR HAS ANY SUCH RE GULATORY AUTHORITY PASSED UPON OR ENDORSED THE MERITS \nOF THIS OFFERING OR THE ACCURACY OR A

In [53]:
split_docs[0:3]

[Document(metadata={'source': '/home/johan/Code/llm_rag_test/bin/data/duedil/EADF PPM Updated - June 2023.pdf', 'page': 0}, page_content='CONFIDENTIAL    \nEnko Africa Debt Fund  \nA public company, limited by shares, incorporated under the laws of Mauritius  \nOffering of Class A and Class B Redeemable Shares  \nPRIVATE PLACEMENT MEMORANDUM  \n22 June 2023  \nMinimum Initial Subscription: US$250,000 for the Class A Shares and US$250,000 for the Class  \nB Shares  \nInvestment Manager:  Enko Fund Managers Limited  \nAdministrator:  IQ EQ Fund Services (Mauritius) Ltd  \nNo person has been authorized in connection with this offering to give any information or make \nany representations other than as contained in this Private Placement Memorandum.  \nTHE SECURITIES OFFERED HEREBY HAVE NOT BEEN FILED WITH OR APPROVED OR DISAPPROVED BY ANY REGULATORY \nAUTHORITY OF ANY COUNTRY, NOR HAS ANY SUCH RE GULATORY AUTHORITY PASSED UPON OR ENDORSED THE MERITS \nOF THIS OFFERING OR THE ACCURACY OR A

In [54]:
result = map_reduce_chain.invoke(split_docs[1:20])

print(result["output_text"])

Token indices sequence length is longer than the specified maximum sequence length for this model (4560 > 1024). Running this sequence through the model will result in indexing errors


These documents provide a comprehensive overview of a private investment fund, likely a closed-end fund or a private equity fund, focusing on its structure, operations, investment strategy, and investor considerations. 

**Key Themes:**

* **Fund Structure and Operations:** The fund is incorporated in Mauritius, regulated by the FSC, and offers various share classes with different features. It has a dedicated Investment Manager and Investment Adviser, and contracts with third-party providers for administration, custody, and other services.
* **Investment Strategy:** The fund aims for long-term capital appreciation through investments in pan-African private and sovereign debt instruments, following a specific investment policy focused on strategic investments in the African capital markets.
* **Investor Considerations:** The documents outline subscription and redemption procedures, including minimum investment amounts, lock-in periods, and NAV calculations. They also detail the fee stru