In [13]:
%use dataframe
%use lets-plot


Test data generation

In [14]:
import java.time.LocalDate
import java.time.format.DateTimeFormatter
import kotlin.random.Random
import java.io.File

import java.time.temporal.ChronoUnit
import kotlin.math.abs
import org.jetbrains.kotlinx.dataframe.api.*

val n = 100
val startDate = LocalDate.of(2025, 1, 1)
val endDate = LocalDate.of(2025, 12, 31)

val categories = mapOf(
  "Salary" to Triple(1000.0, 5000.0, true),
  "Groceries" to Triple(20.0, 200.0, false),
  "Coffee/Food" to Triple(5.0, 50.0, false),
  "Transport" to Triple(5.0, 100.0, false),
  "Entertainment" to Triple(10.0, 150.0, false),
  "Utilities" to Triple(50.0, 300.0, false),
  "Medicine" to Triple(10.0, 200.0, false),
  "Others" to Triple(5.0, 100.0, false)
)

val descriptions = mapOf(
  "Salary" to listOf("Company Payroll", "ACME Salary", "Monthly Income"),
  "Groceries" to listOf("Supermarket Walmart", "Lidl Grocery", "Fresh Market"),
  "Coffee/Food" to listOf("Starbucks Coffee", "Cafe Latte", "McDonald's"),
  "Transport" to listOf("Uber", "Taxi", "Metro Ticket", "Bus Fare", "Train Ticket"),
  "Entertainment" to listOf("Netflix", "Spotify", "Cinema Ticket", "Theatre Play"),
  "Utilities" to listOf("Electric Bill", "Water Bill", "Gas Bill", "Internet Bill"),
  "Medicine" to listOf("Pharmacy Purchase", "Clinic Visit", "Health Store"),
  "Others" to listOf("Amazon Shopping", "Book Store", "Pet Supplies")
)

fun randomDate(start: LocalDate, end: LocalDate): LocalDate {
  val days = ChronoUnit.DAYS.between(start, end)
  return start.plusDays(Random.nextLong(days + 1))
}

val transactions = mutableListOf<List<String>>()
val formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")

for (i in 1..n) {
  val category = categories.keys.random()
  val (minAmount, maxAmount, isIncome) = categories[category]!!
  var amount = String.format("%.2f", Random.nextDouble(minAmount, maxAmount)).toDouble()
  if (!isIncome) amount = -amount

  val description = descriptions[category]!!.random()
  val date = randomDate(startDate, endDate).format(formatter)

  transactions.add(listOf(date, description, amount.toString()))
}

val file = File("transactions.csv")
file.writeText("date,description,amount\n")
transactions.forEach { row ->
  file.appendText(row.joinToString(",") + "\n")
}



Load data from file

In [15]:
val path = "transactions.csv"

val df = DataFrame.readCSV(path)
  .convert("date").to<java.time.LocalDate>()
  .convert("amount").toDouble()
  .sortBy("date")

df.head(5)

date,description,amount
2025-01-02,Health Store,-45.5
2025-01-05,Train Ticket,-31.04
2025-01-10,ACME Salary,4042.16
2025-01-22,Amazon Shopping,-43.23
2025-01-24,Cinema Ticket,-16.14


Statistics


In [16]:
val dates: List<java.time.LocalDate> = df["date"].toList().map { it as java.time.LocalDate }
val start = dates.minOrNull()!!
val end = dates.maxOrNull()!!

val amounts: List<Double> = df["amount"].toList().map { it as Double }
val totalIncome = amounts.filter { it > 0.0 }.sum()
val totalExpense = amounts.filter { it < 0.0 }.sum()
val balance = totalIncome + totalExpense

val statsDf = dataFrameOf(
  "Period" to listOf("$start — $end"),
  "Total income" to listOf(totalIncome),
  "Total expense" to listOf(abs(totalExpense)),
  "Balance" to listOf(balance)
)

statsDf

Period,Total income,Total expense,Balance
2025-01-02 — 2025-12-30,35287.58,6402.43,28885.15


Categories

In [17]:
val rules = mapOf(
  "Groceries" to listOf("supermarket","lidl","aldi","ashan"),
  "Coffee/Food" to listOf("cafe","coffee","ресторан","burger","kfc"),
  "Transport" to listOf("taxi","uber","bolt","bus","train","gas"),
  "Utilities" to listOf("electric","water","utilities"),
  "Entertainment" to listOf("cinema","netflix","spotify"),
  "Salary" to listOf("salary","payroll"),
  "Medicine" to listOf("pharmacy","clinic")
)

fun categorize(desc: String?): String {
  val d = desc?.lowercase() ?: return "Others"
  for ((cat, keys) in rules) {
    if (keys.any { d.contains(it) }) return cat
  }
  return "Others"
}

val df2 = df.add("category") { categorize(it["description"]?.toString()) }
  .add("Expense") { (it["amount"] as Double) < 0 }

df2.head(5)

date,description,amount,category,Expense
2025-01-02,Health Store,-45.5,Others,True
2025-01-05,Train Ticket,-31.04,Transport,True
2025-01-10,ACME Salary,4042.16,Salary,False
2025-01-22,Amazon Shopping,-43.23,Others,True
2025-01-24,Cinema Ticket,-16.14,Entertainment,True


In [18]:
val catSummary = df2.filter { it["Expense"] as Boolean }
  .groupBy("category")
  .aggregate {
    sum("amount") into "total_spent"
  }
  .update { cols("total_spent") }.with { abs((it as Double)) }
  .convert("total_spent").toDouble()
  .sortByDesc("total_spent")

catSummary.head(10)


category,total_spent
Others,2297.27
Entertainment,1314.34
Groceries,865.66
Utilities,860.29
Transport,623.87
Medicine,248.58
Coffee/Food,192.42


Pie chart of expenses by category

In [19]:
val pieData = mapOf(
  "category" to catSummary["category"].toList().map { it as String },
  "total_spent" to catSummary["total_spent"].toList().map { it as Double }
)

val piePlot = letsPlot(pieData) { x = "category"; y = "total_spent"; fill = "category" } +
  geomBar(stat = Stat.identity, width = 1.0, color = "white") +
  coordPolar(theta = "y") +
  ggsize(640, 420) +
  labs(title = "Expenses by Category")

piePlot


Montly spends chart

In [22]:
%use kandy

In [23]:
val df3 = df2.filter { it["Expense"] as Boolean }
  .add("month") {
    (it["date"] as LocalDate).withDayOfMonth(1)
  }
  .groupBy("month")
  .aggregate {
    sum("amount") into "expense"
  }
  .add("expense_abs") { abs(it["expense"] as Double) }

df3.plot {
  line {
    x("month")
    y("expense_abs") {
      scale = continuous(min = 0.0)
    }
    color = Color.RED
  }
  points {
    x("month")
    y("expense_abs")
    color = Color.RED
  }
  layout {
    title = "monthly spends"
    xAxisLabel = "month"
    yAxisLabel = "sum"
  }
}
