In [None]:
import pandas as pd
import pickle

In [None]:
YEARS = ['2021', '2022']
MONTHS = ['01', '02', '03', '04', '05', '06', '07', '08', '09', '10', '11', '12']

In [None]:
input_all = pd.read_csv('./data/input.csv')
sample_data = pd.read_csv('./data/sample.csv')

In [None]:
def cleanup(df : pd.DataFrame) -> pd.DataFrame:
    """Removes columns that are not needed"""
    df = df.drop('converted amount', axis=1)
    df = df.drop('currency.1', axis=1)
    df = df.drop('currency', axis=1)
    df = df.drop('account', axis=1)
    df = df.rename(columns={"description": "temp"})
    df[["subcategory", "description"]] = df["temp"].str.split("::", 1, expand=True)
    df = df.drop("temp", axis=1)
    df["amount"] = pd.to_numeric(df["amount"].str.replace(",", ""))
    df["date"] = pd.to_datetime(df["date"], format="%d/%m/%Y")
    return df

In [None]:
def split_dataframes(df: pd.DataFrame) -> dict:
    """Splits dataframe into monthly dataframes"""
    dfs =  {}
    for y in YEARS:
        dfs[y] = {} 
        for m in MONTHS:
            mon = []
            for _, row in df.iterrows():
                if f"{y}-{m}" in str(row["date"]):
                    mon.append(dict(row))
            if len(mon):
                dfs[y][m] = pd.DataFrame(mon)
    return dfs

In [None]:
def save_dataframes(dfs : dict) -> None:
    """Saves the dataframes as pickles and CSVs."""
    for y in dfs.keys():
        for m in dfs[y].keys():
            dfs[y][m].to_csv(f'data/backup/{y}_{m}.csv')
    
    with open('data/internal.pkl', 'wb') as handle:
        pickle.dump(dfs, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
def print_dataframe(month : str, year : str) -> pd.DataFrame:
    return dfs[year][month]

In [None]:
sample_data

In [None]:
df = cleanup(sample_data)

In [None]:
df

In [None]:
df = split_dataframes(df)

In [None]:
df['2021']['11']