In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import matplotlib.pyplot as plt
import gdown

In [None]:
# Download data from Google Sheets
url = "https://docs.google.com/spreadsheets/d/1HsoBkbMjuH6vd1zR9Wwjm_fp5QBR0SruYb5x7yy-J74/export?format=csv&gid=1708419967"
output = "data.csv"
gdown.download(url, output, quiet=False)

# Load data
df = pd.read_csv("data.csv")
df.head()

Downloading...
From: https://docs.google.com/spreadsheets/d/1HsoBkbMjuH6vd1zR9Wwjm_fp5QBR0SruYb5x7yy-J74/export?format=csv&gid=1708419967
To: /content/data.csv
4.57kB [00:00, 7.03MB/s]


Unnamed: 0,Nama,Penghasilan Bulanan,Pengeluaran Bulanan,Tabungan Bulanan,Tingkat Tabungan,Rasio Pengeluaran,Rasio Tabungan,Hasil Analis,Kesehatan Finansial
0,Amanda,"Rp 5,500,000","Rp 6,000,000","Rp 500,000",9.09%,109.09%,12.00%,0,Kurang Baik
1,Berlinda,"Rp 6,200,000","Rp 7,500,000","Rp 1,300,000",20.97%,120.97%,5.77%,0,Kurang Baik
2,Bunga,"Rp 7,800,000","Rp 5,800,000","Rp 2,000,000",25.64%,74.36%,2.90%,2,Sangat Baik
3,Cici,"Rp 8,500,000","Rp 6,500,000","Rp 2,000,000",23.53%,76.47%,3.25%,1,Baik
4,Cintya,"Rp 9,100,000","Rp 7,200,000","Rp 1,900,000",20.88%,79.12%,3.79%,1,Baik


In [None]:
# Preprocess data
df[' Penghasilan Bulanan '] = df[' Penghasilan Bulanan '].str.replace('Rp ', '').str.replace(',', '').astype(float) / 1e6
df[' Pengeluaran Bulanan'] = df[' Pengeluaran Bulanan'].str.replace('Rp ', '').str.replace(',', '').astype(float) / 1e6
df[' Tabungan Bulanan '] = df[' Tabungan Bulanan '].str.replace('Rp ', '').str.replace(',', '').astype(float) / 1e6

# Select relevant columns
df = df.loc[:, [' Penghasilan Bulanan ', ' Pengeluaran Bulanan', ' Tabungan Bulanan ', 'Kesehatan Finansial']]

print(df.head())
df.to_csv('data.csv')

    Penghasilan Bulanan    Pengeluaran Bulanan   Tabungan Bulanan   \
0                    5.5                   6.0                 0.5   
1                    6.2                   7.5                 1.3   
2                    7.8                   5.8                 2.0   
3                    8.5                   6.5                 2.0   
4                    9.1                   7.2                 1.9   

  Kesehatan Finansial  
0         Kurang Baik  
1         Kurang Baik  
2         Sangat Baik  
3                Baik  
4                Baik  


In [None]:
# Memuat file CSV ke dalam DataFrame pandas
file_path = '/content/data.csv'
df = pd.read_csv(file_path)

# Mengubah label 'Kesehatan Finansial' menjadi angka
LABEL2INDEX = {'Kurang Baik': 0, 'Baik': 1, 'Sangat Baik': 2}
INDEX2LABEL = {v: k for k, v in LABEL2INDEX.items()}

df['Kesehatan Finansial'] = df['Kesehatan Finansial'].apply(lambda lab: LABEL2INDEX[lab])

# Split into features and target
X = df[[' Penghasilan Bulanan ', ' Pengeluaran Bulanan', ' Tabungan Bulanan ']]
y = df['Kesehatan Finansial']

# # Encode target variable
# le = LabelEncoder()
# y = le.fit_transform(y)

# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Define K-fold cross-validation
kf = KFold(n_splits=3, shuffle=True, random_state=42)
# Menyimpan hasil dari setiap fold
fold_accuracies = []
fold_f1_scores = []
fold_precision_scores = []
fold_recall_scores = []
histories = []

# Model initialization (outside the loop)
best_model = None
best_accuracy = 0

In [None]:
for fold, (train_index, test_index) in enumerate(kf.split(X)):
    print(f"Training fold {fold + 1}")

    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Membangun model neural network
    model = Sequential()
    model.add(Dense(32, input_dim=3, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(3, activation='softmax'))

    # Compile model
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Melatih model dan menyimpan history
    history = model.fit(X_train, y_train, epochs=50, batch_size=10, verbose=1)
    histories.append(history)

    # Prediksi pada set pengujian
    y_pred = np.argmax(model.predict(X_test), axis=-1)

    # Menghitung metrik evaluasi
    accuracy = accuracy_score(y_test, y_pred)
    fold_accuracies.append(accuracy)
    fold_f1_scores.append(f1_score(y_test, y_pred, average='weighted'))
    fold_precision_scores.append(precision_score(y_test, y_pred, average='weighted'))
    fold_recall_scores.append(recall_score(y_test, y_pred, average='weighted'))



    # Save the best model
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model = model

# Rata-rata hasil dari semua fold
print(f'Average Accuracy: {np.mean(fold_accuracies)}')
print(f'Average F1 Score: {np.mean(fold_f1_scores)}')
print(f'Average Precision Score: {np.mean(fold_precision_scores)}')
print(f'Average Recall Score: {np.mean(fold_recall_scores)}')

Training fold 1
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Training fold 2
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Ep

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Average Accuracy: 0.8578431372549019
Average F1 Score: 0.8291446706887884
Average Precision Score: 0.8472951680672268
Average Recall Score: 0.8578431372549019


In [None]:
# Simpan model terbaik
model_path = 'best_model.h5'
best_model.save(model_path)
print(f"Model disimpan di {model_path}")

# Simpan scaler
import joblib
scaler_path = 'scaler.pkl'
joblib.dump(scaler, scaler_path)
print(f"Scaler disimpan di {scaler_path}")

Model disimpan di best_model.h5
Scaler disimpan di scaler.pkl


  saving_api.save_model(


In [None]:
# Evaluation Metrics
y_pred_prob = model.predict(X)
y_pred = np.argmax(y_pred_prob, axis=1)

print("Confusion Matrix:\n", confusion_matrix(y, y_pred))
print("Classification Report:\n", classification_report(y, y_pred))
print("F1 Score:", f1_score(y, y_pred, average='weighted'))
print("Precision Score:", precision_score(y, y_pred, average='weighted'))
print("Recall Score:", recall_score(y, y_pred, average='weighted'))


Confusion Matrix:
 [[17  0  0]
 [ 5  6  1]
 [ 0  0 20]]
Classification Report:
               precision    recall  f1-score   support

           0       0.77      1.00      0.87        17
           1       1.00      0.50      0.67        12
           2       0.95      1.00      0.98        20

    accuracy                           0.88        49
   macro avg       0.91      0.83      0.84        49
weighted avg       0.90      0.88      0.86        49

F1 Score: 0.8639328151523273
Precision Score: 0.9017139323261772
Recall Score: 0.8775510204081632
