In [67]:
# pip install langchain_community

In [68]:
from langchain_community.llms import Ollama

In [69]:
llm = Ollama(model = "llama2")
llm.invoke("The first man on moon was...")

'\nThe first man to walk on the Moon was Neil Armstrong. He stepped out of the lunar module Eagle and onto the Moon\'s surface on July 20, 1969, during the Apollo 11 mission. Armstrong famously declared, "That\'s one small step for man, one giant leap for mankind," as he became the first person to set foot on the lunar surface.'

In [70]:
import pandas as pd
import numpy as np
df = pd.read_csv("transactions.csv")
df

Unnamed: 0,Date,Name / Description,Expense/Income,Amount (EUR)
0,2023-12-30,Belastingdienst,Expense,9.96
1,2023-12-30,Tesco Breda,Expense,17.53
2,2023-12-30,Monthly Appartment Rent,Expense,451.0
3,2023-12-30,Vishandel Sier Amsterdam,Expense,12.46
4,2023-12-29,Selling Paintings,Income,13.63
5,2023-12-29,Spotify Ab By Adyen,Expense,12.19
6,2023-12-23,Tk Maxx Amsterdam Da,Expense,27.08
7,2023-12-22,Consulting,Income,541.57
8,2023-12-22,Aidsfonds,Expense,10.7
9,2023-12-20,Consulting,Income,2641.93


In [71]:
# Get unique transaction
unique_transaction = df["Name / Description"].unique()
len(unique_transaction)

23

In [72]:
response = llm.invoke("Can you add an appropriate category to the following expenses. For example: Spotify AB by Adyen - Entertainment, Beta Boulders Ams Amsterdam Nld - Sport, etc.. Categories should be less than 4 words. ")
response

'Of course! I can help you with that. Please provide the list of expenses, and I will categorize them appropriately:\n\nExpense List:\n\n1. Spotify AB by Adyen - Entertainment\n2. Beta Boulders Ams Amsterdam Nld - Sport\n3. Uber Rides - Transportation\n4. Starbucks Coffee - Food & Beverage\n5. Gym Membership - Fitness\n6. Movie Tickets - Entertainment\n7. Flowers Delivery - Gifts/Souvenirs\n8. Vodafone Mobile Plan - Communication\n9. Netflix Subscription - Entertainment\n10. Amazon Prime Membership - Online Shopping\n\nLet me know if you need any further assistance!'

In [73]:
# Get index list
def hop(start, stop, step):
    for i in range(start, stop, step):
        yield i
    yield stop

index_list = list(hop(0, len(unique_transaction), 30))
index_list

[0, 23]

In [86]:
def categorize_transactions(transaction_names, llm):
    response = llm.invoke("Can you add an appropriate category to the following expenses. Categories should be less than 4 words. Categories will follow these rules:" +
        "Entertainment if including all kinds of streaming services, TV, Netflix, Spotify, etc or games\n" +
        "Health/Fitness if including all kinds of sports, activities, running equipment\n" +
        "Education if including all kinds of courses, educational faciliy\n" +
        "Transportation if including all kinds of transport fee\n" +
        "Food if including all bills about food, drink or restaurant meal\n" +
        "None if not in the list above\n" +
        "For example: Spotify AB by Adyen - Entertainment, Beta Boulders Ams Amsterdam Nld - Sport, etc.. Do this for every following single transaction and don't need explanation:" +
        transaction_names
        )
    response = response.split("\n")
    print (response)

    # Initialize the categories_df_all dataframe
    categories_df = pd.DataFrame({'Transaction vs category': response})
    categories_df[['Transaction', 'Category']] = categories_df['Transaction vs category'].str.split(' - ', expand=True)
    
    categories_df = categories_df[categories_df['Category'] != 'None']

    return categories_df

In [87]:
# Test ou function
test_category = categorize_transactions('ISS Catering Services De Meern, Taxi Utrecht, Etos AMSTERDAM NLD, Bistro Bar Amsterdam',
                        llm)
print(test_category)

['Sure! I will add an appropriate category to each of the transactions you provided. Here are the categories for each transaction:', '', '1. Spotify AB by Adyen - Entertainment', '2. Beta Boulders Ams Amsterdam Nld - Sport', '3. ISS Catering Services De Meern - None', '4. Taxi Utrecht - Transportation', '5. Etos AMSTERDAM NLD - Food', '6. Bistro Bar Amsterdam - Food']
                             Transaction vs category  \
0  Sure! I will add an appropriate category to ea...   
1                                                      
2             1. Spotify AB by Adyen - Entertainment   
3         2. Beta Boulders Ams Amsterdam Nld - Sport   
5                   4. Taxi Utrecht - Transportation   
6                       5. Etos AMSTERDAM NLD - Food   
7                     6. Bistro Bar Amsterdam - Food   

                                         Transaction        Category  
0  Sure! I will add an appropriate category to ea...            None  
1                                     

In [130]:
# Initialize the categories
categorizes_df_all = pd.DataFrame()

# Loop through the index_list
for i in range(0, len(index_list)-1):
    transaction_names = unique_transaction[index_list[i]:index_list[i+1]]
    transaction_names = ','.join(transaction_names)

    categorizes_df = categorize_transactions(transaction_names, llm)
    categorizes_df_all = pd.concat([categorizes_df_all, categorizes_df], ignore_index=True)

categorizes_df_all = categorizes_df_all.dropna()

['Understood! Here are the categories for each transaction:', '', '1. Belastingdienst - None', '2. Tesco Breda - Food', '3. Monthly Appartment Rent - None', '4. Vishandel Sier Amsterdam - None', '5. Selling Paintings - None', '6. Spotify Ab By Adyen - Entertainment', '7. Tk Maxx Amsterdam Da - Retail', '8. Consulting - Professional Services', '9. Aidsfonds - None', '10. TLS Bv Inz Ov-Chipkaart - Transportation', '11. Etos Amsterdam - Retail', '12. Beta Boulders Ams Amsterdam - Health/Fitness', '13. Salary - Personal Services', '14. Bouldermuur Bv Amsterdam - None', '15. Birtat Restaurant Amsterdam - Food', '16. Freelancing - Professional Services', '17. Tikkie - Finance', '18. Blogging - Personal Services', '19. Taxi Utrecht - Transportation', '20. Apple Services - Technology', '21. Amazon Lux - Retail', '22. Classpass* Monthly - Health/Fitness', '23. Audible Uk AdblCo/Pymt Gbr - Entertainment', '', 'I hope this helps! Let me know if you have any questions or need further assistance.']

In [131]:
# Remove the numbering eg "1. " from Transaction column
import re

pattern = r'^\d+\.\s+'
categorizes_df_all['Transaction'] = categorizes_df_all['Transaction'].apply(lambda x: re.sub(pattern, '', x))
categorizes_df_all

Unnamed: 0,Transaction vs category,Transaction,Category
2,2. Tesco Breda - Food,Tesco Breda,Food
3,6. Spotify Ab By Adyen - Entertainment,Spotify Ab By Adyen,Entertainment
4,7. Tk Maxx Amsterdam Da - Retail,Tk Maxx Amsterdam Da,Retail
5,8. Consulting - Professional Services,Consulting,Professional Services
6,10. TLS Bv Inz Ov-Chipkaart - Transportation,TLS Bv Inz Ov-Chipkaart,Transportation
7,11. Etos Amsterdam - Retail,Etos Amsterdam,Retail
8,12. Beta Boulders Ams Amsterdam - Health/Fitness,Beta Boulders Ams Amsterdam,Health/Fitness
9,13. Salary - Personal Services,Salary,Personal Services
10,15. Birtat Restaurant Amsterdam - Food,Birtat Restaurant Amsterdam,Food
11,16. Freelancing - Professional Services,Freelancing,Professional Services


In [132]:
categorizes_df_all

Unnamed: 0,Transaction vs category,Transaction,Category
2,2. Tesco Breda - Food,Tesco Breda,Food
3,6. Spotify Ab By Adyen - Entertainment,Spotify Ab By Adyen,Entertainment
4,7. Tk Maxx Amsterdam Da - Retail,Tk Maxx Amsterdam Da,Retail
5,8. Consulting - Professional Services,Consulting,Professional Services
6,10. TLS Bv Inz Ov-Chipkaart - Transportation,TLS Bv Inz Ov-Chipkaart,Transportation
7,11. Etos Amsterdam - Retail,Etos Amsterdam,Retail
8,12. Beta Boulders Ams Amsterdam - Health/Fitness,Beta Boulders Ams Amsterdam,Health/Fitness
9,13. Salary - Personal Services,Salary,Personal Services
10,15. Birtat Restaurant Amsterdam - Food,Birtat Restaurant Amsterdam,Food
11,16. Freelancing - Professional Services,Freelancing,Professional Services


In [133]:
categorizes_df_all.to_csv("transactions_2022_2023_categorized.csv", index=False)