In [1]:
#Imports
!pip3 install langchain_community
from langchain_community.llms import Ollama
import json
import os
import pandas as pd




In [2]:
# 1. Configuration
MEMORY_FILE = 'cache_category.json'
MODEL = 'expense-classifier-eng'

In [3]:
# 2. This function loads the memory from a JSON file
def load_memory():
    if os.path.exists(MEMORY_FILE):
        try:
            with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
                return json.load(f)
        except json.JSONDecodeError:
            return {}  # If file is empty or invalid, start fresh
    return {}

In [4]:
# 3. This function saves the memory to a JSON file
def save_memory(memory):
    with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
        json.dump(memory, f, indent=4, ensure_ascii=False)

In [5]:
#3.5 The idea is to only use Ollama on new cases, here I link the Ollama model created for expense classification. 
llm = Ollama(model=MODEL, base_url="http://localhost:11434")

  llm = Ollama(model=MODEL, base_url="http://localhost:11434")


In [6]:
# 4. Rationale to classify expenses
def obtain_category(description, memory):
    # Handle NaN or non-string values
    if pd.isna(description) or not isinstance(description, str):
        return "No Description"
    
    # clean the data (mayúsculas/minúsculas)
    description_clean = description.strip().upper()
    
    # A: Check on the dicctionary whether we already know it
    if description_clean in memory:
        return memory[description_clean]

    # B: If it's new we ask Ollama
    print(f"Asking the IA about: {description}...")
    try:
        category = llm.invoke(f"Clasifica: {description}").strip()
        
        # Save in memory for future cases
        memory[description_clean] = category
        return category
    except Exception as e:
        print(f"Error with Ollama: {e}")
        return "Not Classified"

### Execution

In [7]:
known_dictionary = load_memory()
#Read the transaction data from the raw data folder
df = pd.read_csv('raw data/transactions_2022_2023.csv')
df.head()

Unnamed: 0,Date,Name / Description,Expense/Income,Amount (EUR)
0,2023-12-30,Belastingdienst,Expense,9.96
1,2023-12-30,Tesco Breda,Expense,17.53
2,2023-12-30,Monthly Appartment Rent,Expense,451.0
3,2023-12-30,Vishandel Sier Amsterdam,Expense,12.46
4,2023-12-29,Selling Paintings,Income,13.63


In [8]:
# Lambda function to apply the classification using the known dictionary
df['Category'] = df['Name / Description'].apply(lambda x: obtain_category(x, known_dictionary))

# Saves the updated memory
save_memory(known_dictionary)

# Save the final result for a dashboard
df.to_csv('classified_expenses.csv', index=False)

print("Expense classification completed and saved to classified_expenses.csv")

Asking the IA about: Belastingdienst...
Asking the IA about: Tesco Breda...
Asking the IA about: Monthly Appartment Rent...
Asking the IA about: Vishandel Sier Amsterdam...
Asking the IA about: Selling Paintings...
Asking the IA about: Spotify Ab By Adyen...
Asking the IA about: Tk Maxx Amsterdam Da...
Asking the IA about: Consulting...
Asking the IA about: Aidsfonds...
Asking the IA about: Tls Bv Inz Ov-Chipkaart...
Asking the IA about: Etos Amsterdam...
Asking the IA about: Beta Boulders Ams Amsterdam...
Asking the IA about: Salary...
Asking the IA about: Bouldermuur Bv Amsterdam...
Asking the IA about: Birtat Restaurant Amsterdam...
Asking the IA about: Freelancing...
Asking the IA about: Tikkie...
Asking the IA about: Blogging...
Asking the IA about: Taxi Utrecht...
Asking the IA about: Apple Services...
Asking the IA about: Amazon Lux...
Asking the IA about: Classpass* Monthly...
Asking the IA about: Audible Uk AdblCo/Pymt Gbr...
Expense classification completed and saved to class