# Preparation

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.models import Sequential

In [None]:
data = pd.read_csv("https://raw.githubusercontent.com/rozalinazahraa/BudgetBonsai/main/personal_data_finance.csv")
data.head()

Unnamed: 0,Date / Time,Mode,Category,Sub category,Income / Expense,Debit / Credit,Cumulative
0,01-01-2021,CUB - online payment,Allowance,From dad,Income,8000.0,8000.0
1,01-01-2021,CUB - online payment,Food,Snacks,Expense,85.0,7915.0
2,03-01-2021,CUB - online payment,Other,From dad,Income,500.0,8415.0
3,03-01-2021,CUB - online payment,Household,Stuffs,Expense,6667.0,1748.0
4,03-01-2021,CUB - online payment,Transportation,Metro,Expense,30.0,1718.0


# Preprocessing

Tahap preprocessing dalam notebook ini akan sama dengan notebook sebelumnya. Oleh karena itu, untuk memudahkan dan menghemat tempat, akan dilakukan preprocessing secara langsung.

In [None]:
data["Date / Time"] = pd.to_datetime(data["Date / Time"], format = "%d-%m-%Y")
data["Day"] = data["Date / Time"].dt.day
data["Month"] = data["Date / Time"].dt.month

days_in_month = {
     1: 31,
     2: 28,    # Considering non-leap year
     3: 31,
     4: 30,
     5: 31,
     6: 30,
     7: 31,
     8: 31,
     9: 30,
    10: 31,
    11: 30,
    12: 31
}

data["Numerical Date"] = 0
for index, row in data.iterrows():
    month = int(row["Month"])
    day = int(row["Day"])
    numerical_date = sum(days_in_month[i] for i in range(1, month)) + day
    data.at[index, "Numerical Date"] = numerical_date

data = data.drop_duplicates(subset = ["Numerical Date"], keep = "last")

data = data.drop(columns = ["Date / Time", "Mode", "Category", "Sub category", "Income / Expense", "Debit / Credit", "Day", "Month"])

data.head()

Unnamed: 0,Cumulative,Numerical Date
1,7915.0,1
5,1640.0,3
6,180.0,12
8,71.0,13
9,411.0,14


In [None]:
X = data[["Numerical Date"]]
y = data["Cumulative"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

# Multi-Layer Perceptron (MLP)

Multi-Layer Perceptron (MLP) merupakan jenis Neural Network  yang terdiri dari beberapa lapisan neuron dengan koneksi *feedforward*. Berikut merupakan struktur MLP yang akan digunakan dalam notebook kali ini:

In [None]:
model_MLP = Sequential()
model_MLP.add(Dense(64, input_dim = X_train.shape[1], activation = "relu"))
model_MLP.add(Dense(32, activation = "relu"))
model_MLP.add(Dense(1, activation = "linear"))

model_MLP.compile(optimizer = "adam", loss = "mean_squared_error")

In [None]:
model_MLP.fit(X_train, y_train, epochs = 1000, batch_size = 10, validation_data = (X_test, y_test), verbose = 0)
y_pred_MLP = model_MLP.predict(X_test)

print("Root MSE:", np.sqrt(mean_squared_error(y_test, y_pred_MLP)))
print("R2 score:", r2_score(y_test, y_pred_MLP))

Root MSE: 32020.646617813683
R2 score: 0.970303966473104


Root Mean Squared Error (RMSE) diatas menunjukkan nilai $32074.74$ dan R-squared score (R2 score) menunjukkan nilai $97.02\%$.

In [None]:
model_MLP.save("Model MLP.h5")
model_MLP.save_weights("Model MLP weights.h5")

# JSON file
model_json = model_MLP.to_json()
with open("Model MLP.json", "w") as json_file:
    json_file.write(model_json)

# TFLite file
converter = tf.lite.TFLiteConverter.from_keras_model(model_MLP)
tflite_model = converter.convert()
with open("Model MLP.tflite", "wb") as tflite_file:
    tflite_file.write(tflite_model)

# Binary file
with open("Model MLP weights.bin", "wb") as binary_file:
    binary_file.write(open("Model MLP weights.h5", "rb").read())

  saving_api.save_model(


# Long Short-Term Memory (LSTM)

Long Short-Term Memory (LSTM) merupakan jenis Neural Network yang mampu belajar dan mengingat ketergantungan jangka panjang dalam data deret waktu. LSTM termasuk dalam jenis Recurrent Neural Network (RNN).

Untuk menerapkan LSTM, kita harus melakukan sedikit *processing* data terlebih dahulu. Hal ini karena kita perlu mengubah format data menjadi data 3D, yang baru dapat diolah oleh model LSTM.

In [None]:
X = data[["Numerical Date"]].values
y = data["Cumulative"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

Berikut merupakan model LSTM yang akan digunakan dalam notebook kali ini.

In [None]:
model_LSTM = Sequential()
model_LSTM.add(LSTM(64, input_shape = (X_train.shape[1], X_train.shape[2]), activation = "relu"))
model_LSTM.add(Dense(32, activation = "relu"))
model_LSTM.add(Dense(1))

model_LSTM.compile(optimizer = "adam", loss = "mean_squared_error")

In [None]:
model_LSTM.fit(X_train, y_train, epochs = 1000, batch_size = 10, validation_data = (X_test, y_test), verbose = 0)
y_pred_LSTM = model_LSTM.predict(X_test)

print("Root MSE:", np.sqrt(mean_squared_error(y_test, y_pred_LSTM)))
print("R2 score:", r2_score(y_test, y_pred_LSTM))

Root MSE: 30459.886003838394
R2 score: 0.9731283213833689


Root Mean Squared Error (RMSE) diatas menunjukkan nilai $30390.76$ dan R-squared score (R2 score) menunjukkan nilai $97.32\%$. Hasil yang didapat ini lebih bagus dibandingkan hasil yang didapat dari MLP.

In [None]:
model_LSTM.save("Model LSTM.h5")
model_LSTM.save_weights("Model LSTM weights.h5")

# JSON file
model_json = model_LSTM.to_json()
with open("Model LSTM.json", "w") as json_file:
    json_file.write(model_json)

# TFLite file
converter = tf.lite.TFLiteConverter.from_keras_model(model_LSTM)
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]
converter.experimental_new_converter = True
converter.experimental_lower_tensor_list_ops = False

tflite_model = converter.convert()
with open("Model LSTM.tflite", "wb") as tflite_file:
    tflite_file.write(tflite_model)

# Binary file
with open("Model LSTM weights.bin", "wb") as binary_file:
    binary_file.write(open("Model LSTM weights.h5", "rb").read())

  saving_api.save_model(
