# generate dynamic pdf based on html template

In [4]:
from jinja2 import Environment, FileSystemLoader
from weasyprint import HTML

# Prepare data
data = {
    "Account_Number": "123-456-789",
    "Statement_Date": "2024-03-01",
    "Period_Covered": "2024-02-01 to 2024-02-29",
    "name": "John Doe",
    "address_line1": "2450 Courage St, STE 108",
    "address_line2": "Brownsville, TX 78521",
    "Opening_Balance": "175,800.00",
    "Total_Credit_Amount": "510,000.00",
    "Total_Debit_Amount": "94,000.00",
    "Closing_Balance": "591,800.00",
    "Account_Type": "Savings",
    "Number_Transactions": "10",
    "transactions": [
        {"Date": "2024-03-01", "Description": "Coffee Shop", "Credit": "$50.00", "Debit": "-$5.00", "Balance": "$995.00"},
        {"Date": "2024-03-01", "Description": "Online Purchase", "Credit": "$121.51", "Debit": "-", "Balance": "$1,116.51"}, 
        {"Date": "2024-03-02", "Description": "Coffee Shop", "Credit": "$143.06", "Debit": "-", "Balance": "$1,259.57"}, 
        {"Date": "2024-03-03", "Description": "Utility Bill", "Credit": "-", "Debit": "-$60.72", "Balance": "$1,198.85"}, 
    ]
}
loan = {
    "title": "Personal Loan Application",
    "form_title": "Apply for a Personal Loan",
    "form_action": "/submit-loan-application",
    "applicant": {
        "first_name": "John",
        "last_name": "Doe",
        "dob": "1980-01-01",
        "email": "johndoe@example.com",
        "phone": "123-456-7890",
        "address": "123 Main St, Anytown, AN",
        "annual_income": 50000,
        "other_income": 5000,
        "monthly_expenses": 2000
    },
    "employment_statuses": ["Employed", "Self-employed", "Unemployed", "Retired", "Student"],
    "loan_purposes": {
        "debtConsolidation": "Debt Consolidation",
        "homeImprovement": "Home Improvement",
        "vehiclePurchase": "Vehicle Purchase",
        "medicalExpenses": "Medical Expenses",
        "other": "Other"
    },
    "loan_details": {
        "amount": 15000,
        "purpose": "debtConsolidation",
        "term": 5,
        "interest_rate": "5.5%"
    }
}


# Load template
env = Environment(loader=FileSystemLoader(''))
#template = env.get_template('html/statement-template-2.html')
template = env.get_template('html/loan-template-1.html')

# Render HTML content
#html_content = template.render(transactions=transactions)
#html_content = template.render(**data)
html_content = template.render(**loan)

# Generate PDF
HTML(string=html_content).write_pdf('pdf/loan_statement.pdf')

: 

# convert pdf to image 

In [32]:
import pdf2image
import numpy as np
import cv2
def pdftopages(path):
    """Input: PDF Filepath, Output: List of Page objects."""
    pil_pages = pdf2image.convert_from_path(path)
    #save_pil_images(pil_pages, os.path.join(local_store_folder, self.name + "pages"))
    page_imgs = [cv2.cvtColor(np.asarray(p), cv2.COLOR_RGB2BGR) for p in pil_pages]
    return page_imgs
imgs = pdftopages('pdf/bank_statement.pdf')
cv2.imwrite('img/bk.png', imgs[0])

True

In [6]:
# generate multiple pdfs
import json
from agents.pdf_from_html import Gen_pdf
gen_pdf_tool = Gen_pdf()

bank_info = {}
htmls = ['html/statement-template-0.html','html/statement-template-1.html','html/statement-template-2.html']
idx = 1
history = ''
for idx in range(1,100):
    pdf_path = f'pdf/bs{idx}.pdf'
    img_path=f'img/bk{idx}.png'
    html_path = htmls[idx%3]
    data = gen_pdf_tool(history=history, pdf_path=pdf_path, img_path=img_path, html_path=html_path)
    bank_info[img_path] = data
    history = history + data + '\n'

with open('bank_info.json', 'w') as f:
    json.dump(bank_info, f)

# convert img to latex using mathpix

In [9]:
from dotenv import load_dotenv
import os
from agents.mathpix import MathpixProcessor
# Load the environment variables from the .env file
load_dotenv()
app_key = os.getenv('mathpix_app_key')
app_id = os.getenv('mathpix_app_id')
mathpix_processor = MathpixProcessor(app_id=app_id, app_key=app_key)

In [15]:
image_path = 'img/bk.png'
text, images = mathpix_processor(image_path=image_path)

In [1]:
text = 'Overview\nBank Statement Template 1\n(C)\n\nSheet 1: Bank Statement Template 1\n231 Valley Farms Street\nSanta Monica, CA 90403\nfirstcitizensbank@domain.com\n\nAccount Number: 111-234-567-890\nStatement Date: \\( \\mathrm{mm} / \\mathrm{dd} / \\mathrm{yyyy} \\)\nPeriod Covered: \\( \\mathrm{mm} / \\mathrm{dd} / \\) yyyy to \\( \\mathrm{mm} / \\mathrm{dd} / \\) yyyy\n\nJohn Smith\n2450 Courage St, STE 108\nBrownsville, TX 78521\n<Branch Name>\nSTATEMENT OF ACCOUNT\nOpening Balance:\nTotal Credit Amount:\nTotal Debit Amount:\nClosing Balance:\nAccount Type:\nNumber of Transactions:\nPage 1 of 1\n\\( 175,800.00 \\)\n\\( 510,000.00 \\)\n\\( 94,000.00 \\)\n\\( 591,800.00 \\)\nCurrent Account\n8\n\nTransactions\n\\begin{tabular}{lllll}\n\\hline Date & Description & Credit & Debit & Balance \\\\\n\\( 2024-03-01 \\) & Coffee Shopd & \\( \\$ 50.00 \\) & \\( -\\$ 5.00 \\) & \\( \\$ 995.00 \\) \\\\\nmm/dd/yyyy & Payment - Insurance & & \\( 3,000.00 \\) & \\( 167,400.00 \\) \\\\\nmm/dd/yyyy & Account Transfer In & \\( 500,000.00 \\) & \\( 667,400.00 \\) \\\\\nmm/dd/yyyy & Cheque Deposite & \\( 10,000.00 \\) & \\( 677,400.00 \\) \\\\\nmm/dd/yyyy & Payment - Electricity & & \\( 1,500.00 \\) & \\( 675,900.00 \\) \\\\\nmm/dd/yyyy & Payment - Water Utility & & 600.00 & \\( 675,300.00 \\) \\\\\nmm/dd/yyyy & Payment - Car Loan & & \\( 3,500.00 \\) & \\( 671,800.00 \\) \\\\\nmm/dd/yyyy & Account Transfer Out & & \\( 80,000.00591,800.00 \\)\n\\end{tabular}'

# extract info with llm

In [5]:
import sys
import os
from dotenv import load_dotenv
# Load the environment variables from the .env file
load_dotenv()
from xyz.node.agent import Agent
from xyz.node.basic.llm_agent import LLMAgent
from xyz.utils.llm.openai_client import OpenAIClient
# OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
OPENAI_API_KEY = "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"

In [6]:
class Audit(Agent):
    def __init__(self):
        self.openai_agent = OpenAIClient(api_key=OPENAI_API_KEY, model='gpt-4-0125-preview', temperature=0., top_p=1.0,
                                        max_tokens=2096)
        super().__init__(
            self.openai_agent)  # 这里需要传进去一个 xyz.utils.llm 里的语言模型，目前只有 openai（为了 之后的自驱动做准备，其实也并不是必须的。可以什么也不传。）

        self.set_name("auditSolver")
        self.set_description("This is a audit agent that extracting information from bank statement.")
        self.set_parameters({"file": {"type": "str", "description": "The latex version of the pdf file"}})

        self.llm_cot_agent = LLMAgent(AUDIT, self.openai_agent, inner_multi=False, stream=False)

    def flowing(self, file: str) -> str:
        response = self.llm_cot_agent(file=file)

        return response


AUDIT = {
    "system": """Now, you are a Audit assistant who can help user to extract information from bank statement.
    ## You must follow all the requirements to modify the draft:
        1. You must extract the name of the bank from this bank statement.
        2. You must extract the name of person from this bank statement .
        3. You must extract the statemtent date from this bank statement.
        4. You must extract the Period Coverd from this bank statement.
        5. You must extract the Opening Balance from this bank statement.
        6. You must extract the Closing Balance from this bank statement.
        7. You must extract all the transactions history from this bank statement.
    
    ## About the output:
        Your output must be a json file containing a python dictionary to store the extracted information in the format looks like this: 
        
        {{
            "bank_name": "xxx",
            "person_name": "xxx",
            "statement_date": "xxx",
            "period_covered": "xxx",
            "opening_balance": "xxx",
            "closing_balance": "xxx",
            "transactions": [
                {{"date": "xxx", "description": "xxx", "credit": "xxx", "debit": "xxx", "balance": "xxx"}},
                # Add more transactions
            ]
        }}
        You must follow all requirements listed above. 
        Your output must contain the json file quoted by "```json" and "```"

    """,
    "user": """
The bank statement is:

{file}
"""}


In [7]:
audit_solver = Audit()
info = audit_solver(file=text)

In [9]:
from agents.extract_info import Extract_info
extract_agent = Extract_info()
image_path = 'img/bk.png'
response = extract_agent(image_path=image_path)

['/home/ubuntu/xiangpeng/NetMind_AI_XYZ/example/audit', '/home/ubuntu/miniconda3/envs/agent/lib/python310.zip', '/home/ubuntu/miniconda3/envs/agent/lib/python3.10', '/home/ubuntu/miniconda3/envs/agent/lib/python3.10/lib-dynload', '', '/home/ubuntu/miniconda3/envs/agent/lib/python3.10/site-packages', '/home/ubuntu/xiangpeng/NetMind_AI_XYZ', '/home/ubuntu/xiangpeng/NetMind_AI_XYZ']
['/home/ubuntu/xiangpeng/NetMind_AI_XYZ/example/audit', '/home/ubuntu/miniconda3/envs/agent/lib/python310.zip', '/home/ubuntu/miniconda3/envs/agent/lib/python3.10', '/home/ubuntu/miniconda3/envs/agent/lib/python3.10/lib-dynload', '', '/home/ubuntu/miniconda3/envs/agent/lib/python3.10/site-packages', '/home/ubuntu/xiangpeng/NetMind_AI_XYZ', '/home/ubuntu/xiangpeng/NetMind_AI_XYZ', '/home/ubuntu/xiangpeng/NetMind_AI_XYZ']


In [10]:
print(response)

{
    "bank_name": "First Citizens Bank",
    "person_name": "John Smith",
    "statement_date": "mm/dd/yyyy",
    "period_covered": "mm/dd/yyyy to mm/dd/yyyy",
    "opening_balance": "$175,800.00",
    "closing_balance": "$591,800.00",
    "transactions": [
        {"date": "2024-03-01", "description": "Coffee Shop", "credit": "$50.00", "debit": "-$5.00", "balance": "$995.00"},
        {"date": "mm/dd/yyyy", "description": "Payment - Insurance", "credit": "", "debit": "$3,000.00", "balance": "$167,400.00"},
        {"date": "mm/dd/yyyy", "description": "Account Transfer In", "credit": "$500,000.00", "debit": "", "balance": "$667,400.00"},
        {"date": "mm/dd/yyyy", "description": "Cheque Deposit", "credit": "$10,000.00", "debit": "", "balance": "$677,400.00"},
        {"date": "mm/dd/yyyy", "description": "Payment - Electricity", "credit": "", "debit": "$1,500.00", "balance": "$675,900.00"},
        {"date": "mm/dd/yyyy", "description": "Payment - Water Utility", "credit": "", "de