In [None]:
# This is to enable autoreload of modules in Jupyter notebooks
# It allows you to see changes in your code without restarting the kernel
# Make sure to run this cell before running any other cells that depend on your modules
%load_ext autoreload
%autoreload 2

# 💳 OCR Transaction Extractor - April 2025

This notebook extracts transactions from bank statement screenshots using Tesseract OCR and saves them to a CSV.

In [None]:
# 📦 Install dependencies
!pip install pytesseract opencv-python pandas numpy matplotlib

## Test each function in isolation before running the entire notebook

In [None]:
import src.transactions as transactions
import matplotlib.pyplot as plt
import cv2
import re
from collections import Counter

### 📅 Date Extraction Functions

#### 🏙️ Show the dates image

In [None]:
cropped_images = transactions.get_cropped_dates_and_transactions_images("data/sensitive/test_image.png")
plt.imshow(cropped_images.dates_image)
plt.axis('off')  # Turn off axis labels
plt.show()

#### Get the list of dates

In [None]:
transactions.get_list_of_dates(cropped_images.dates_image)

#### 🔢 Count the number of dates

In [None]:
transactions.count_number_of_dates(cropped_images.dates_image)

### 💶 Transaction Extraction Functions

#### 🌇 Show the transactions image

In [None]:
plt.imshow(cropped_images.transactions_image)
plt.axis('off')  # Turn off axis labels
plt.show()

#### Get the list of amounts

In [None]:
transactions.get_list_of_amounts(cropped_images.transactions_image)

In [None]:
transactions.get_list_of_formatted_amounts(cropped_images.transactions_image)

#### Count the number of amounts

In [None]:
transactions.count_number_of_amounts(cropped_images.transactions_image)

#### Get the list of descriptions

In [None]:
transactions.get_list_of_descriptions(cropped_images.transactions_image)

In [None]:
transactions.get_list_of_formatted_descriptions(cropped_images.transactions_image)

#### Count the number of descriptions

In [None]:
transactions.count_number_of_descriptions(cropped_images.transactions_image)

### Test the final functions

In [None]:
transactions.is_number_of_rows_matching(
    dates_image=cropped_images.dates_image, 
    transactions_image=cropped_images.transactions_image
)

In [None]:
transactions.get_list_of_transactions(
    dates_image=cropped_images.dates_image, 
    transactions_image=cropped_images.transactions_image
)

In [None]:
image = cv2.imread("data/sensitive/test_image.png")
raw_text = transactions.extract_text_from_image(image)
raw_text

In [None]:
lines_without_total = transactions.get_lines_after_total(raw_text)
lines_without_total

### 🔍 Load an Image and Extract Transactions

In [None]:
# Replace 'test_image.png' with your file path
df = transactions.extract_transactions_from_image("data/sensitive/test_image.png")
df.to_csv("data/sensitive/transactions_1.csv", index=False)
print(df)