In [1]:
import pandas as pd

In [2]:
dataset = pd.read_csv("/content/datset.csv")

In [3]:
dataset.head()

Unnamed: 0.1,Unnamed: 0,_id,user_id,category_id,amount,created_at,type,currency,image,note,location
0,0,63ccbd89ae3e5a2f6f52ef3a,63cc5580bcf3d54f64dec4a5,63cc6391bcf3d54f64dec4a8,56.5,2023-01-03T22:46:11.354Z,expense,CAD,,OPUS,
1,1,63ccbe50ae3e5a2f6f52ef3b,63cc5580bcf3d54f64dec4a5,63cc6340bcf3d54f64dec4a7,3.25,2023-01-02T22:46:11.354Z,expense,CAD,,Tim Hortons,
2,2,63ccbeb6ae3e5a2f6f52ef3c,63cc5580bcf3d54f64dec4a5,63cc6407bcf3d54f64dec4ab,14.2,2023-01-01T22:46:11.354Z,expense,CAD,,Multi Vitamins,
3,3,63ccbf81ae3e5a2f6f52ef3d,63cc5580bcf3d54f64dec4a5,63cc6340bcf3d54f64dec4a7,50.0,2023-01-05T22:46:11.354Z,expense,CAD,,Burgundy Lion,
4,4,63ccc304ae3e5a2f6f52ef3e,63cc5580bcf3d54f64dec4a5,63cc4501bcf3d54f64dec48d,28.8,2023-01-06T22:46:11.354Z,expense,CAD,,Walmart,


In [4]:
cat_map = [
  {
    "_id": {
      "$oid": "63cc4501bcf3d54f64dec48d"
    },
    "name": "Home and groceries"
  },
  {
    "_id": {
      "$oid": "63cc6340bcf3d54f64dec4a7"
    },
    "name": "Social Life"
  },
  {
    "_id": {
      "$oid": "63cc6391bcf3d54f64dec4a8"
    },
    "name": "Travel"
  },
  {
    "_id": {
      "$oid": "63cc639bbcf3d54f64dec4a9"
    },
    "name": "Rent & Utilities"
  },
  {
    "_id": {
      "$oid": "63cc63f8bcf3d54f64dec4aa"
    },
    "name": "Subscriptions"
  },
  {
    "_id": {
      "$oid": "63cc6407bcf3d54f64dec4ab"
    },
    "name": "Health"
  },
  {
    "_id": {
      "$oid": "63cc6419bcf3d54f64dec4ac"
    },
    "name": "Apparel & Accesories"
  },
  {
    "_id": {
      "$oid": "63cc6436bcf3d54f64dec4ad"
    },
    "name": "Others"
  }
]

In [5]:
dataset["note"].unique()
reps = {
    "Tim Hortans": "Tim Hortons",
    "TIm Hortans": "Tim Hortons",
}
for rep in reps:
    dataset.replace(rep, reps.get(rep), inplace=True)

In [6]:
categories = {}
for cat in cat_map:
    categories[cat.get("_id").get("$oid")] = cat.get("name")

In [7]:
categories

{'63cc4501bcf3d54f64dec48d': 'Home and groceries',
 '63cc6340bcf3d54f64dec4a7': 'Social Life',
 '63cc6391bcf3d54f64dec4a8': 'Travel',
 '63cc639bbcf3d54f64dec4a9': 'Rent & Utilities',
 '63cc63f8bcf3d54f64dec4aa': 'Subscriptions',
 '63cc6407bcf3d54f64dec4ab': 'Health',
 '63cc6419bcf3d54f64dec4ac': 'Apparel & Accesories',
 '63cc6436bcf3d54f64dec4ad': 'Others'}

In [8]:
def get_expense_by_cat(expense_dataframe, categories_map, user_id):
    expense_dataframe = expense_dataframe[expense_dataframe["user_id"] == user_id]
    expenditures = {}
    for category in categories_map:
        cat_values = expense_dataframe[expense_dataframe["category_id"] == category]
        cat_expenditure = cat_values["amount"].sum()
        expenditures[category] = { "total": cat_expenditure, "category_dframe": cat_values }
    return expenditures

In [9]:
def get_categorized_expenses_per_user(expense_dataframe, categories_map):
    user_maps = {}
    for user_id in expense_dataframe["user_id"].unique():
        user_maps[user_id] = get_expense_by_cat(expense_dataframe, categories_map, user_id)
    return user_maps

In [10]:
user_maps = get_categorized_expenses_per_user(dataset, categories)

In [11]:
import plotly.graph_objects as go

categories = ['processing cost','mechanical properties','chemical stability',
              'thermal stability', 'device integration']

fig = go.Figure()

fig.add_trace(go.Scatterpolar(
      r=[1, 5, 2, 2, 3],
      theta=categories,
      fill='toself',
      name='Product A'
))
fig.add_trace(go.Scatterpolar(
      r=[4, 3, 2.5, 1, 2],
      theta=categories,
      fill='toself',
      name='Product B'
))

fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 5]
    )),
  showlegend=False
)

fig.show()

In [12]:
def get_categorical_analysis(user_maps, categories_map, user_id):
    user_spendings = user_maps.get(user_id)
    # Generates a pie chart with spendings on categories
    labels = list(categories_map.values())
    data = [user_spendings[expense].get("total") for expense in user_spendings]
    figure = go.Figure()
    figure.add_trace(
        go.Pie(
            sort=False,
            direction='clockwise',
            labels=labels,
            values=data,
            textinfo='label',
            marker={'line': {'color': 'white', 'width': 1}}
        )
    )
    figure.show()

In [13]:
get_categorical_analysis(user_maps, categories, "63cc5580bcf3d54f64dec4a5")

AttributeError: ignored

In [14]:
def get_analysis_per_cat(user_maps, categories, user_id, category_id):
    data_by_cat = user_maps.get(user_id).get(category_id).get("category_dframe")
    labels = data_by_cat["note"].unique()
    sub_cat_data = {}
    for label in labels:
        sub_cat_data[label] = data_by_cat[data_by_cat["note"] == label]["amount"].sum()
    print(sub_cat_data)
    print([sub_cat_data.get(label) for label in labels])
    fig = go.Figure()
    fig.add_trace(go.Scatterpolar(
          r=[sub_cat_data.get(label) for label in labels],
          theta=labels,
          fill='toself',
          name=f'User id {user_id}'
    ))
    fig.show()

In [15]:
get_analysis_per_cat(user_maps, categories, "63cc5580bcf3d54f64dec4a5", "63cc6340bcf3d54f64dec4a7")

{'Tim Hortons': 37.08, 'Burgundy Lion': 50.0, 'Ice Hotel': 70.0, 'SAQ': 35.0, 'Beers': 8.4, 'Whisky': 57.0, 'Subway': 6.3, 'Pizza': 8.0, 'Restaurant': 36.0, 'Ice Skating': 10.0}
[37.08, 50.0, 70.0, 35.0, 8.4, 57.0, 6.3, 8.0, 36.0, 10.0]
