In [None]:
%pip install pypdf

In [None]:
from pypdf import PdfReader

reader = PdfReader("statements/bbva_signature_eecc_24_01.pdf")
page = reader.pages[1]
print(page.extract_text())

In [None]:
text = page.extract_text(extraction_mode="layout", layout_mode_space_vertically=False)
lines = text.splitlines()
test_line = lines[15]
test_line

In [None]:
import re

parsed_lines = []
for line in lines:
    parsed_line = re.sub(r' {2,}', '|', line).split('|')
    parsed_lines.append(parsed_line)
    print(parsed_line)

In [None]:
from datetime import date
from enum import Enum
from dataclasses import dataclass

class Currency(Enum):
    PEN = "PEN"
    USD = "USD"

@dataclass
class Operation:
    date: date
    description: str
    country: str | None
    amount: float
    currency: Currency

@dataclass
class Holder:
    name: str
    ending_card: str
    operations: list[Operation]

def parse_amount(amount: str) -> float | None:
    if amount == "---":
        return None
    return float(amount.replace(",", ""))

def parse_operation_line(line: list[str]) -> Operation:
    if len(line) == 6:
        pen_amount = parse_amount(line[4])
        usd_amount = parse_amount(line[5])
        amount = pen_amount if pen_amount is not None else usd_amount
        if amount is None:
            raise ValueError("Invalid amount", line)
        day, month = map(int, line[1].split("-"))
        return Operation(
            date=date(2021, month, day),
            description=line[2],
            country=line[3],
            amount=amount,
            currency=Currency.PEN if line[4] != "---" else Currency.USD
        )
    elif len(line) == 5:
        pen_amount = parse_amount(line[3])
        usd_amount = parse_amount(line[4])
        amount = pen_amount if pen_amount is not None else usd_amount
        if amount is None:
            raise ValueError("Invalid amount", line)
        day, month = map(int, line[1].split("-"))
        return Operation(
            date=date(2021, month, day),
            description=line[2],
            country=None,
            amount=amount,
            currency=Currency.PEN if line[3] != "---" else Currency.USD
        )
    raise ValueError("Invalid line") 

holders = []
for line in parsed_lines:
    print(line)
    if line[0][:10] == "DETALLE DE":
        name, ending_card = line[1].split(" - ")
        holders.append(Holder(name, ending_card, []))
    elif len(holders) > 0:
        holders[-1].operations.append(parse_operation_line(line))
holders


In [None]:
total = 0
for operation in holders[2].operations:
    if operation.currency == Currency.USD:
        continue
    print(operation.amount)
    total += operation.amount
print(total)