In [6]:
import pandas as pd
import plotly.graph_objects as go
from datetime import datetime
from pytz import timezone
from dateutil import parser

In [7]:
exp_colors_dict = {'Bills & Fees': ['#d1605e', '#e8b6b5'],
                   'Cab': ['#d1a35e', '#e8d1b5'],
                   'Drinks': ['#d15ea1', '#e8b5d3'],
                   'Entertainment': ['#5e79d1', '#b5bbe8'],
                   'Food & Drink': ['#e49444', '#f2cfa6'],
                   'Food at Work': ['#d1d15e', '#e5e8b5'],
                   'Groceries': ['#6ad15e', '#b7e8b5'],
                   'Healthcare': ['#918e8e', '#d1cfcf'],
                   'Home': ['#995ed1', '#cfb5e8'],
                   'Necessity': ['#918e8e', '#d1cfcf'],
                   'Other': ['#918e8e', '#d1cfcf'],
                   'Shopping': ['#5ed1ad', '#b5e8da'],
                   'Transport': ['#a7d15e', '#d8e8b5'],
                   'Travel': ['#d15ea1', '#e8b5d3'],
                   'Work': ['#a7d15e', '#d8e8b5'],
                   'Gifts': ['#a7d15e', '#d8e8b5'],
                   'Automotive': ['#a7d15e', '#d8e8b5'],
                   'Fees & Adjustments': ['#a7d15e', '#d8e8b5'],
                   'Bills & Utilities': ['#a7d15e', '#d8e8b5'],
                   'Cab for Work': ['#d1d15e', '#e5e8b5']}

In [3]:
def convert(date):
  format = "%Y-%m-%d"
  # date_utc = datetime.strptime(date[:10], format)
  date_utc = parser.parse(date)
  date_et = date_utc.astimezone(timezone('US/Eastern'))

  date = datetime.strftime(date_et, format)
  return date


def monthlyData(df, month):
  df['Date'] = df['Date'].apply(lambda date: convert(date))
  df = df[df['Date'].map(lambda x: x[:7]) == month]
  return df


def expense_vacay(df, vacation):
  exp_df = df.loc[(df['Category name'] != 'Rent') & (df['Type'] != 'Income')]

  vacay_start = vacation[0]
  vacay_end = vacation[1]

  # daily expenses
  # expenses_df = exp_df[exp_df['Date'] < vacay_start & exp_df['Date'] > vacay_end]
  exp_df1 = exp_df[exp_df['Date'] < vacay_start]
  exp_df2 = exp_df[exp_df['Date'] > vacay_end]
  expenses_df = pd.concat([exp_df1, exp_df2], ignore_index=True, sort=False)
  expenses_df = expenses_df[['Category name', 'Amount']].groupby("Category name", as_index=False).sum()

  vacay_df = exp_df[(exp_df['Date'] >= vacay_start) & (exp_df['Date'] <= vacay_end)]
  vacay_df = vacay_df[['Category name', 'Amount']].groupby("Category name", as_index=False).sum()

  return expenses_df, vacay_df



def get_spendee_values(month, vacation):
  # 365 days
  df = pd.read_csv("spendee_to_sankey.csv")

  # remove decimal and sign
  df['Amount'] = df['Amount'].astype(int).abs()

  df.drop(['Wallet', 'Currency', 'Labels', 'Author'], axis=1, inplace=True)

  df = monthlyData(df, month)

  salary = df[df['Type'] == 'Income']['Amount'].sum()
  rent = df[df['Category name'] == 'Rent']['Amount'].sum()

  k401 = df[df['Category name'] == '401k']['Amount'].sum()

  if(len(vacation) != 0):
    expenses_df, vacay_df = expense_vacay(df, vacation)
    vacay = vacay_df['Amount'].sum()
  else:
    expenses_df = df.loc[(df['Category name'] != 'Rent') & (df['Type'] != 'Income')]
    expenses_df = expenses_df[['Category name', 'Amount']].groupby("Category name", as_index=False).sum()
    vacay = 0

  expenses = expenses_df['Amount'].sum()

  sav = salary - rent - expenses - vacay

  return expenses_df, salary, rent, sav, expenses, vacay, k401



def get_per(salary, amount):
  return f" ({round(amount/salary * 100, 2)}%)"


def get_overview_sankey(month, title, vacation=[]):
  expenses_df, salary, rent, sav, expenses, vacay, k401 = get_spendee_values(month, vacation)

  #.               0.                      1.                  2.                    3
  label_arr = ["Salary: $"+str(salary), "Rent: $"+str(rent)+get_per(salary, rent), "Savings: $"+str(sav)+get_per(salary, sav), "Expenses: $"+str(expenses)+get_per(salary, expenses)]
  color_arr = ["green", "#d1605e", "#6a9f58", "#e49444"]
  source_arr = [0, 0, 0, 0]
  target_arr = [1, 2, 3]
  value_arr = [rent, sav, expenses]
  color_link_arr = ["#e8b6b5", "#bad4b3", "#f2cfa6"]

  if(vacay != 0):
    label_arr.append("Vacay: $"+str(vacay))
    color_arr.append("#85b5b2")
    source_arr.append(0)
    target_arr.append(4)
    value_arr.append(vacay)
    color_link_arr.append("#c7dddc")

  fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      label = label_arr,
      color = color_arr,
    ),
    link = dict(
      source = source_arr,
      target = target_arr,
      value = value_arr,
      color = color_link_arr,
  ))])

  fig.update_layout(title_text=title, font_size=10)
  fig.show()


def get_expenses_sankey(month, title, vacation=[]):
  expenses_df, salary, rent, sav, expenses, vacay, k401 = get_spendee_values(month, vacation)

  lab_names = expenses_df['Category name'].tolist()
  exp_vals = expenses_df['Amount'].tolist()

  exp_labels = []
  for i in range(len(lab_names)):
    label = lab_names[i] + ": $" + str(exp_vals[i])
    exp_labels.append(label)

  node_cols = []
  link_cols = []
  for name in lab_names:
    node_cols.append(exp_colors_dict[name][0])
    link_cols.append(exp_colors_dict[name][1])

  exp_source = [0]*len(exp_labels)
  target_i = [i for i in range(1, len(exp_labels)+1)]

  fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 0.5),
      #.        0.
      label = ["Expenses: $"+str(expenses), *exp_labels],
      color = ["#e49444"]+node_cols,
    ),
    link = dict(
      source = [0]*len(exp_labels),
      target = [i for i in range(1, len(exp_labels)+1)],
      value = exp_vals,
      color = link_cols
  ))])

  fig.update_layout(title_text=title, font_size=10)
  fig.show()


In [None]:
get_overview_sankey('10', "October 2023 Overview:", ['2023-10-01', '2023-10-03'])
get_expenses_sankey('10', "October 2023 Expenses:", ['2023-10-01', '2023-10-03'])

In [None]:
get_overview_sankey('11', "November 2023 Overview:")
get_expenses_sankey('11', "November 2023 Expenses:")

In [None]:
get_overview_sankey('12', "December 2023 Overview:")
get_expenses_sankey('12', "December 2023 Expenses:")

In [None]:
get_overview_sankey('2024-01', "January 2024 Overview:")
get_expenses_sankey('2024-01', "January 2024 Expenses:")

In [None]:
get_overview_sankey('2024-02', "February 2024 Overview:")
get_expenses_sankey('2024-02', "February 2024 Expenses:")

NameError: name 'get_overview_sankey' is not defined

In [None]:
get_overview_sankey('2024-03', "March 2024 Overview:")
get_expenses_sankey('2024-03', "March 2024 Expenses:")

In [None]:
get_overview_sankey('2024-04', "April 2024 Overview:")
get_expenses_sankey('2024-04', "April 2024 Expenses:")

In [11]:
get_overview_sankey('2024-05', "May 2024 Overview:")
get_expenses_sankey('2024-05', "May 2024 Expenses:")

# get the last 365 days data from spendee, and then keep adding the latest month to it, concat dfs and then download, that way at all times you will have a csv version of all spendee data