In [None]:
import ollama

def is_transaction_email_title(title: str):
    response = ollama.chat(model='llama3.2-vision:11b-instruct-q4_K_M', messages=[
        {
            'role': 'system',
            'content': """You are an expert in classifying email titles whether they are a transaction journal/receipt or not.

Your task:
Only output a "true" or "false" for each title that is given. Each title also has a from:<domain>. You may ignore the source domain.
Make sure to return "false" for promotional or emails about returns.

Example:
Input: Internet Transaction Journal from:bca.co.id
Result: true

Input: Your Grab E-Receipt from:grab.com
Result: true

Input: OVO QR Payment Receipt from:ovo.co.id
Result: true

Input: Surplus Neraca Perdagangan RI di Bawah Ekspektasi from:stockbit.com
Result: false

IMPORTANT: Provide ONLY the true/false answer. Do not include any introductory phrases,
labels, or meta-text like "Here's an answer"
Start directly with the answer.
            """
        },
        {
            'role': 'user',
            'content': title,
        },
    ])

    result = response['message']['content'].lower()

    if result != "true" and result != "false":
        print(f"Title `{title}` unexpected answer: {result}")

    return result == "true"

In [None]:
import pandas as pd

df = pd.read_csv("titles.csv")
df

In [None]:
classifications = [False] * 10000

for idx, row in df.iterrows():
    if idx % 100 == 0:
        print(f"Progress: {idx}/{len(df)}")
        
    classifications[idx] = is_transaction_email_title(row["title"])

# Merge to main df
df["is_transaction"] = classifications
df

In [None]:
df.to_csv("titles-labelled.csv")