# Personal Finance Project Paolo Alberda

### Importing the Libraries

In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 

### Loading the csv file 

In [38]:
df_ing_data = pd.read_csv('Alle_rekeningen_01-01-2015_13-08-2024.csv')
df_ing_data.head()

Unnamed: 0,Datum,Naam / Omschrijving,Rekening,Tegenrekening,Code,Af Bij,Bedrag (EUR),Mutatiesoort,Mededelingen
0,20240810,Apple Services,NL82INGB0001215409,NL04ADYB2017400157,ID,Af,1500,iDEAL,Naam: Apple Services Omschrijving: J84KBR5VLVH...
1,20240810,Oranje Spaarrekening,NL82INGB0001215409,,GT,Bij,1500,Online bankieren,Van Oranje spaarrekening R34416091 Valutadatum...
2,20240810,Belastingdienst,NL82INGB0001215409,NL86INGB0002445588,GT,Af,21600,Online bankieren,Naam: Belastingdienst Omschrijving: 2240500312...
3,20240810,Oranje Spaarrekening,NL82INGB0001215409,,GT,Bij,21600,Online bankieren,Van Oranje spaarrekening D34416090 Valutadatum...
4,20240809,D. Bindels via Rabo Betaalverzoek,NL82INGB0001215409,NL13RABO0181015595,ID,Af,909,iDEAL,Naam: D. Bindels via Rabo Betaalverzoek Omschr...


### Change the column names to English

In [39]:
df_ing_data = df_ing_data.rename(columns={"Datum": "Date", "Naam / Omschrijving": "Description", "Rekening": "Account", "Tegenrekening":"Recipient",
                            "Code":"Code","Af Bij":"Added/Deducted","Bedrag (EUR)":"Amount EUR","Mutatiesoort":"Payment Type",
                            "Mededelingen":"Extra Description"})

In [40]:
df_ing_data

Unnamed: 0,Date,Description,Account,Recipient,Code,Added/Deducted,Amount EUR,Payment Type,Extra Description
0,20240810,Apple Services,NL82INGB0001215409,NL04ADYB2017400157,ID,Af,1500,iDEAL,Naam: Apple Services Omschrijving: J84KBR5VLVH...
1,20240810,Oranje Spaarrekening,NL82INGB0001215409,,GT,Bij,1500,Online bankieren,Van Oranje spaarrekening R34416091 Valutadatum...
2,20240810,Belastingdienst,NL82INGB0001215409,NL86INGB0002445588,GT,Af,21600,Online bankieren,Naam: Belastingdienst Omschrijving: 2240500312...
3,20240810,Oranje Spaarrekening,NL82INGB0001215409,,GT,Bij,21600,Online bankieren,Van Oranje spaarrekening D34416090 Valutadatum...
4,20240809,D. Bindels via Rabo Betaalverzoek,NL82INGB0001215409,NL13RABO0181015595,ID,Af,909,iDEAL,Naam: D. Bindels via Rabo Betaalverzoek Omschr...
...,...,...,...,...,...,...,...,...,...
12327,20230301,Maja Nell Leutner,NL70INGB0100141463,DE67370696273009632017,OV,Bij,20000,Overschrijving,Naam: Maja Nell Leutner Omschrijving: Grocerie...
12328,20230228,Hr PF Alberda,NL70INGB0100141463,NL82INGB0001215409,GT,Bij,6000,Online bankieren,Naam: Hr PF Alberda IBAN: NL82INGB0001215409 D...
12329,20230228,Ista Nederland B.V.,NL70INGB0100141463,NL02INGB0675360641,GT,Af,6000,Online bankieren,Naam: Ista Nederland B.V. Omschrijving: Voorsc...
12330,20230228,Hr PF Alberda,NL70INGB0100141463,NL82INGB0001215409,GT,Bij,9688,Online bankieren,Naam: Hr PF Alberda Omschrijving: March Food I...


### Format the date column

Option 1: I prefer this one because I want to treat it as a date column

In [41]:
# Step 1: Convert the Date column to a string format (if it's not already)
df_ing_data['Date'] = df_ing_data['Date'].astype(str)

# Step 2: Convert the string to a datetime object
df_ing_data['Date'] = pd.to_datetime(df_ing_data['Date'], format='%Y%m%d')

# Step 3: Format it back to a string in the desired format
df_ing_data['Date'] = df_ing_data['Date'].dt.strftime('%Y-%m-%d')


Option 2: If you do not treat it as a date function and you just want to modify a string and add hyphens


In [42]:
# Convert the 'Date' column to string if it's not already
df['Date'] = df['Date'].astype(str)
# Insert hyphens to format as 'YYYY-MM-DD'
df['Date'] = df['Date'].str[:4] + '-' + df['Date'].str[4:6] + '-' + df['Date'].str[6:]
# Display the DataFrame
print(df)

NameError: name 'df' is not defined

Option 3: Use a for loop

In [None]:
# Convert the 'Date' column to string if it's not already
df['Date'] = df['Date'].astype(str)

# Use a for loop to modify each date in the DataFrame
for i in range(len(df)):
    date_str = df.loc[i, 'Date']  # Access the string date
    formatted_date = date_str[:4] + '-' + date_str[4:6] + '-' + date_str[6:]  # Reformat it
    df.loc[i, 'Date'] = formatted_date  # Update the DataFrame with the new format

# Display the DataFrame
print(df)

In [None]:
df_ing_data

Unnamed: 0,Date,Description,Account,Recipient,Code,Added/Deducted,Amount EUR,Payment Type,Extra Description
0,2024-08-10,Apple Services,NL82INGB0001215409,NL04ADYB2017400157,ID,Af,1500,iDEAL,Naam: Apple Services Omschrijving: J84KBR5VLVH...
1,2024-08-10,Oranje Spaarrekening,NL82INGB0001215409,,GT,Bij,1500,Online bankieren,Van Oranje spaarrekening R34416091 Valutadatum...
2,2024-08-10,Belastingdienst,NL82INGB0001215409,NL86INGB0002445588,GT,Af,21600,Online bankieren,Naam: Belastingdienst Omschrijving: 2240500312...
3,2024-08-10,Oranje Spaarrekening,NL82INGB0001215409,,GT,Bij,21600,Online bankieren,Van Oranje spaarrekening D34416090 Valutadatum...
4,2024-08-09,D. Bindels via Rabo Betaalverzoek,NL82INGB0001215409,NL13RABO0181015595,ID,Af,909,iDEAL,Naam: D. Bindels via Rabo Betaalverzoek Omschr...
...,...,...,...,...,...,...,...,...,...
12327,2023-03-01,Maja Nell Leutner,NL70INGB0100141463,DE67370696273009632017,OV,Bij,20000,Overschrijving,Naam: Maja Nell Leutner Omschrijving: Grocerie...
12328,2023-02-28,Hr PF Alberda,NL70INGB0100141463,NL82INGB0001215409,GT,Bij,6000,Online bankieren,Naam: Hr PF Alberda IBAN: NL82INGB0001215409 D...
12329,2023-02-28,Ista Nederland B.V.,NL70INGB0100141463,NL02INGB0675360641,GT,Af,6000,Online bankieren,Naam: Ista Nederland B.V. Omschrijving: Voorsc...
12330,2023-02-28,Hr PF Alberda,NL70INGB0100141463,NL82INGB0001215409,GT,Bij,9688,Online bankieren,Naam: Hr PF Alberda Omschrijving: March Food I...


### Remove unnecessary column(s)

In [None]:
df_ing_data.drop(columns=['Code'])

Unnamed: 0,Date,Description,Account,Recipient,Added/Deducted,Amount EUR,Payment Type,Extra Description
0,2024-08-10,Apple Services,NL82INGB0001215409,NL04ADYB2017400157,Af,1500,iDEAL,Naam: Apple Services Omschrijving: J84KBR5VLVH...
1,2024-08-10,Oranje Spaarrekening,NL82INGB0001215409,,Bij,1500,Online bankieren,Van Oranje spaarrekening R34416091 Valutadatum...
2,2024-08-10,Belastingdienst,NL82INGB0001215409,NL86INGB0002445588,Af,21600,Online bankieren,Naam: Belastingdienst Omschrijving: 2240500312...
3,2024-08-10,Oranje Spaarrekening,NL82INGB0001215409,,Bij,21600,Online bankieren,Van Oranje spaarrekening D34416090 Valutadatum...
4,2024-08-09,D. Bindels via Rabo Betaalverzoek,NL82INGB0001215409,NL13RABO0181015595,Af,909,iDEAL,Naam: D. Bindels via Rabo Betaalverzoek Omschr...
...,...,...,...,...,...,...,...,...
12327,2023-03-01,Maja Nell Leutner,NL70INGB0100141463,DE67370696273009632017,Bij,20000,Overschrijving,Naam: Maja Nell Leutner Omschrijving: Grocerie...
12328,2023-02-28,Hr PF Alberda,NL70INGB0100141463,NL82INGB0001215409,Bij,6000,Online bankieren,Naam: Hr PF Alberda IBAN: NL82INGB0001215409 D...
12329,2023-02-28,Ista Nederland B.V.,NL70INGB0100141463,NL02INGB0675360641,Af,6000,Online bankieren,Naam: Ista Nederland B.V. Omschrijving: Voorsc...
12330,2023-02-28,Hr PF Alberda,NL70INGB0100141463,NL82INGB0001215409,Bij,9688,Online bankieren,Naam: Hr PF Alberda Omschrijving: March Food I...


### Start adding Categories

Logic to fill the categories.
1. Add a categories columns
2. See which descriptions occur the most. This helps with filling the category columns
3. Create a database that includes all your transactions (descriptions) and assign a category to them based on the full description name. 
i.e. Albert Heijn Maastricht 1992 > Albert Heijn > Food Etc (Description, Company, Category) < structure of the database > for data labeling
4. Create predfined lists with keywords that can occur in the Description column. BASED on the database in step 3. If the keywords appear in Description column the it automatically inherit the category name from that list as a data label
5. Loop through with a for loop

In [None]:
# Add a categories column
df_ing_data["Category"] = ""

In [55]:
# Examine the most frequently occuring columns
freq_table_ing_description = pd.crosstab(df_ing_data['Description'], 'no_of_Description_Occurrences')   
freq_table_ing_description
freq_table_ing_description.sort_values(by=['no_of_Description_Occurrences'], ascending=False) 
freq_table_ing_description

col_0,no_of_Description_Occurrences
Description,Unnamed: 1_level_1
't Klaoske MAASTRICHT NLD,1
1144 action MAASTRICHT NLD,1
123test BV,1
17649 Maastricht MAASTRICHT NLD,1
2525 Ventures B.V. via PAY.nl,1
...,...
ov-chipkaart/Trans Link Systems by Buckaroo,4
parkeerautomaat AZM MAASTRICHT,2
slagerij Van Melik BP MAASTRICHT,1
t Klaoske MAASTRICHT NLD,1


In [None]:
# Investigate the "Hr PF Alberda" column in Description
df_ing_data_category_PF = df_ing_data.query("Description == 'Oranje Spaarrekening'")
df_ing_data_category_PF

In [57]:
# Create Predefined Lists by first changing the Description inputs to a list because im not going to type everything out myself
full_category_list = df_ing_data['Description'].tolist()
print(full_category_list)

['Apple Services', 'Oranje Spaarrekening', 'Belastingdienst', 'Oranje Spaarrekening', 'D. Bindels via Rabo Betaalverzoek', 'Van Gool via Tikkie', 'Nationale-Nederlanden', 'Nationale-Nederlanden', 'INCASSO CREDITCARD ACCOUNTNR 210', 'Oranje Spaarrekening', 'Infomedics B.V.', 'Oranje Spaarrekening', 'Maja Nell Leutner', 'Mw MN Leutner', 'Stichting Woonpunt', 'OHRA Zorgverzekeringen', 'COOLBLUE BY BUCKAROO', 'BUDGET ENERGIE', 'Maja Nell Leutner', 'STORTING ING', 'Oranje Spaarrekening', 'Mw MN Leutner', 'Revolut**7005* Dublin IRL', 'Oranje Spaarrekening', 'Thuisbezorgd.nl ADFX7Z Utrecht', 'Mengelers Textiel MAASTRICHT NLD', 'KOFFIE BIJ JOOST EN MA NLD', 'Sportcity', 'Trans.Reference: F2C3054B77144F63', 'Oranje Spaarrekening', 'Ista Nederland B.V.', 'Oranje Spaarrekening', 'Mw MN Leutner', 'Coop Supermarkt 2274 MAASTRICHT', 'TECALLIANCE NETHERLANDS B.V.', 'Oranje Spaarrekening', 'Oranje Spaarrekening', 'Oranje Spaarrekening', 'TheWinners', 'Paolo Alberda', 'Flatex Bank AG', 'Flatex Bank AG'

In [None]:
create a for loop that checks the predefined list that is composed of the results in the freq_table_ing_description df
so
for i in description:
    if description is in list:
        fill category with list Name
    elif
    else: