In [None]:
#Imports
# !pip3 install langchain_community
from langchain_community.llms import Ollama
import json
import os
import pandas as pd

In [2]:
# 1. Configuration
MEMORY_FILE = 'cache_category.json'
MODEL = 'expense-classifier-eng'

In [3]:
# 2. This function loads the memory from a JSON file
def load_memory():
    if os.path.exists(MEMORY_FILE):
        try:
            with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
                return json.load(f)
        except json.JSONDecodeError:
            return {}  # If file is empty or invalid, start fresh
    return {}

In [4]:
# 3. This function saves the memory to a JSON file
def save_memory(memory):
    with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
        json.dump(memory, f, indent=4, ensure_ascii=False)

In [None]:
#3.5 The idea is to only use Ollama on new cases, here I link the Ollama model created for expense classification. 
llm = Ollama(model=MODEL, base_url="http://localhost:11434")

In [None]:
# 4. Rationale to classify expenses
def obtain_category(description, memory):
    # Handle NaN or non-string values
    if pd.isna(description) or not isinstance(description, str):
        return "No Description"
    
    # clean the data (mayúsculas/minúsculas)
    description_clean = description.strip().upper()
    
    # A: Check on the dicctionary whether we already know it
    if description_clean in memory:
        return memory[description_clean]

    # B: If it's new we ask Ollama
    print(f"Asking the IA about: {description}...")
    try:
        category = llm.invoke(f"Clasify: {description}").strip()
        
        # Save in memory for future cases
        memory[description_clean] = category
        return category
    except Exception as e:
        print(f"Error with Ollama: {e}")
        return "Not Classified"

### Execution

In [None]:
known_dictionary = load_memory()
#Read the transaction data from the raw data folder
df = pd.read_csv('transactions_2022_2023.csv')
df.head()

In [None]:
# Lambda function to apply the classification using the known dictionary
df['Category'] = df['Name / Description'].apply(lambda x: obtain_category(x, known_dictionary))

# Saves the updated memory
save_memory(known_dictionary)

# Save the final result for a dashboard
df.to_csv('classified_expenses.csv', index=False)

print("Expense classification completed and saved to classified_expenses.csv")