In [None]:
%matplotlib inline

In [None]:
from IPython.display import display
from IPython.display import clear_output
from ipywidgets import *
import datetime as dt
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import locale

# Settings

In [None]:
locale.setlocale(locale.LC_ALL, 'en_GB.utf8');

# Generic functions

In [None]:
def amount_to_str(x):
    x = float(x)
    x /= 100.0
    return locale.format("%.2f", x, grouping=True)

In [None]:
def get_period(df,
               start=dt.date.today().replace(year=1900),
               end=dt.date.today()):
    period = df.loc[df.date >= start]
    return period.loc[period.date <= end]

# Data

In [None]:
df = pd.read_csv('afschriften.csv')

In [None]:
cats = pd.read_csv('categories.csv')

# Preprocessing
###### Column renaming

In [None]:
renamings = {
    'Datum': 'date',
    'Naam / Omschrijving': 'name',
    'Rekening': 'account_from',
    'Tegenrekening': 'account_to',
    'Code': 'code',
    'Af Bij': 'sign',
    'Bedrag (EUR)': 'amount',
    'MutatieSoort': 'device',
    'Mededelingen': 'note',
}
df = df.rename(columns=renamings)

###### Date parsing

In [None]:
# Parse the date column to datetimes and set as index.
df.date = pd.to_datetime(df.date, format='%Y%m%d')

###### Account to parsing

In [None]:
df.account_to = df.account_to.replace(np.nan, '-')

###### Sign and amount parsing

In [None]:
# Transform the sign column to symbols.
df.sign = df.sign.apply(lambda sign: '+' if sign == 'Bij' else '-')

In [None]:
# Transform amount column from str to float.
df.amount = df.amount.replace({',': '.'}, regex=True).astype(np.float64)

In [None]:
# Transform amount column from float to int, without losing the cents.
df.amount = df.amount.apply(lambda x: x * 100).astype(int)

In [None]:
# Apply the symbol in the sign column to the amounts.
f = lambda x: x.amount if x.sign == '+' else -x.amount
df.amount = df[['sign', 'amount']].apply(f, axis=1, broadcast=True)

###### Finished

In [None]:
display(df.head())

# Categorisation
## Categories

In [None]:
def create_category_overview(df, cats):
    df = df.loc[:, ('account_to', 'amount')]
    df = pd.merge(cats, df, on='account_to').drop('account_to', axis=1)
    df = df.groupby('category').sum()
    return df

def print_category_overview(overview):
    overview = overview.sort_values(by='amount')
    overview.amount = overview.amount.apply(amount_to_str)
    display(overview)

## Uncategorised

In [None]:
def print_uncategorised(df):
    idxs = ~df.account_to.isin(cats.account_to), ('name', 'account_to', 'amount')
    display(df.loc[idxs])

# Overview: income & expense

In [None]:
def describe(s):
    """Creates a custom description of a series.
    """
    return {
        'total': s.sum(),
        'positive': s.loc[s >= 0].sum(),
        'negative': s.loc[s < 0].sum(),
        'min': s.min(),
        'max': s.max(),
        'avg': s.mean(),
        'median': s.median(),
#         'mode': s.mode(),
        'count': s.count(),
    }

# TODO: fix formatting with plus/minus signs.
def print_description(d, name):
    """Prints a description. 
    
    Naming is based on the assumption that
    the description describes a revenue stream.
    """
    # All amounts to str representation.
    d = {k: v if k == 'count' else amount_to_str(v) for k, v in d.items()}
    
    print('--- {} ---'.format(name))
    print('expense {}'.format(d['negative']))
    print('revenue  {}'.format(d['positive']))
    print('income  {}'.format(d['total']))
    print()
    print('count    {}'.format(d['count']))
    print('min     {}'.format(d['min']))
    print('max      {}'.format(d['max']))
    print('avg     {}'.format(d['avg']))
    print('median  {}'.format(d['median']))
#     print('mode    {:.0f}'.format(d['mode']))

In [None]:
def summed_mutation_overview(period):
    """Sums all mutations per account_to and adds name to each account.
    
    df: Entire history in a DataFrame.
    start: DateTime for start (inclusive) of the period.
    end: DateTime for end (inclusive) of the period.
    Returns: DataFrame with columns 'account_to, name, amount'. Amounts
        are summed by account_to. The first name found for an account_to
        in the df is assigned as a value in the respective name column. 
        The only exception to this is the account_to with value '-', this
        gets the name '-'.
    """
    # Combine amounts with respect to the account_to and add a name.
    accountto_amount = period[['account_to', 'amount']].groupby('account_to', as_index=False).sum()
    accountto_name = period[['account_to', 'name']].groupby('account_to', as_index=False).first()
    overview = pd.merge(accountto_name, accountto_amount, how='left', on='account_to')
    # Assigning name '-' to account_to == '-'.
    overview.loc[overview.account_to == '-', 'name'] = '-'
    # Prettify and sort.
    overview = overview.sort_values(by=['amount'])
    overview.amount = overview.amount.apply(amount_to_str)
    return overview

## Current month

In [None]:
def current_month_start():
    start = dt.date.today()
    if start.day > 25:
        start += dt.timedelta(7)
    return start.replace(day=1)

In [None]:
period = get_period(df, current_month_start())

### Description

In [None]:
print_description(describe(period.amount), "CURRENT MONTH")

### Account mutations

In [None]:
summed_mutation_overview(period)

### Category mutations

In [None]:
print_category_overview(create_category_overview(period, cats))

#### Uncategorised

In [None]:
print_uncategorised(period)

## Last 4 weeks

In [None]:
start = dt.date.today() - dt.timedelta(weeks=4)
period = get_period(df, start)

### Description

In [None]:
print_description(describe(period.amount), "4 WEEKS")

### Account mutations

In [None]:
summed_mutation_overview(period)

### Category mutations

In [None]:
print_category_overview(create_category_overview(period, cats))

#### Uncategorised

In [None]:
print_uncategorised(period)

In [None]:
period.iloc[62]

## Last 3 months

In [None]:
start = dt.date.today() - dt.timedelta(weeks=12)
period = get_period(df, start)

### Description

In [None]:
print_description(describe(period.amount), "12 WEEKS")

### Account mutations

In [None]:
summed_mutation_overview(period)

### Category mutations

In [None]:
print_category_overview(create_category_overview(period, cats))

#### Uncategorised

In [None]:
print_uncategorised(period)

## Last year

In [None]:
start = dt.date.today() - dt.timedelta(weeks=52)
period = get_period(df, start)

### Description

In [None]:
print_description(describe(period.amount), "52 WEEKS")

### Account mutations

In [None]:
summed_mutation_overview(period)

### Category mutations

In [None]:
print_category_overview(create_category_overview(period, cats))

#### Uncategorised

In [None]:
print_uncategorised(period)

## Last 4 years

In [None]:
start = dt.date.today() - dt.timedelta(weeks=208)
period = get_period(df, start)

### Description

In [None]:
print_description(describe(period.amount), "208 WEEKS")

### Account mutations

In [None]:
summed_mutation_overview(period)

### Category mutations

In [None]:
print_category_overview(create_category_overview(period, cats))

#### Uncategorised

In [None]:
print_uncategorised(period)

## All

### Description

In [None]:
print_description(describe(df.amount), "TOTAL")

### Account mutations

In [None]:
summed_mutation_overview(df)

### Category mutations

In [None]:
print_category_overview(create_category_overview(df, cats))

#### Uncategorised

In [None]:
print_uncategorised(df)