# **1. Import Library**
Pada tahap ini, Anda perlu mengimpor beberapa pustaka (library) Python yang dibutuhkan untuk analisis data dan pembangunan model machine learning.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV
from google.colab import files

# **2. Memuat Dataset dari Hasil Clustering**
Memuat dataset hasil clustering dari file CSV ke dalam variabel DataFrame.

In [None]:
uploaded = files.upload()

df = pd.read_csv('data_clustering.csv')

Saving data_clustering.csv to data_clustering.csv


In [None]:
df.head()

Unnamed: 0,TransactionAmount,TransactionDate,TransactionType,Location,Channel,CustomerAge,CustomerOccupation,TransactionDuration,LoginAttempts,AccountBalance,PreviousTransactionDate,Age_Bin,Age_Bin_Encoded,Target
0,0.007207,680,1,36,0,0.83871,0,0.244828,0.0,0.336832,105,Remaja,0,2
1,0.19594,1178,1,15,0,0.806452,0,0.451724,0.0,0.918055,192,Remaja,0,0
2,0.06568,1262,1,23,2,0.016129,3,0.158621,0.0,0.068637,41,Remaja,0,0
3,0.096016,818,1,33,2,0.129032,3,0.051724,0.0,0.569198,163,Remaja,0,2
4,0.006874,1939,0,1,2,0.435484,3,0.648276,0.0,0.492591,16,Remaja,0,1


# **3. Data Splitting**
Tahap Data Splitting bertujuan untuk memisahkan dataset menjadi dua bagian: data latih (training set) dan data uji (test set).

In [None]:
X = df.drop(columns=['Target', 'Age_Bin'])
y = df['Target']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print("Jumlah data latih:", X_train.shape[0])
print("Jumlah data uji:", X_test.shape[0])

Jumlah data latih: 1900
Jumlah data uji: 476


# **4. Membangun Model Klasifikasi**
Setelah memilih algoritma klasifikasi yang sesuai, langkah selanjutnya adalah melatih model menggunakan data latih.

Berikut adalah rekomendasi tahapannya.
1. Menggunakan algoritma klasifikasi yaitu Decision Tree.
2. Latih model menggunakan data yang sudah dipisah.

In [None]:
model_dt = DecisionTreeClassifier(random_state=42)
model_dt.fit(X_train, y_train)

print("\nModel Decision Tree telah dilatih:")
print(model_dt)


Model Decision Tree telah dilatih:
DecisionTreeClassifier(random_state=42)


In [None]:
joblib.dump(model_dt, 'decision_tree_model.h5')

['decision_tree_model.h5']

# **5. Memenuhi Kriteria Skilled dan Advanced dalam Membangun Model Klasifikasi**



**Biarkan kosong jika tidak menerapkan kriteria skilled atau advanced**

In [None]:
model_dt = DecisionTreeClassifier(random_state=42)
model_knn = KNeighborsClassifier(n_neighbors=5)

models = {
    "Decision Tree": model_dt,
    "KNN": model_knn
}

In [None]:
model_dt.fit(X_train, y_train)
model_knn.fit(X_train, y_train)

results = []

for name, model in models.items():
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    recall = recall_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted', zero_division=0)

    results.append({
        'Model': name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1
    })

df_results = pd.DataFrame(results)
print("=== Hasil Evaluasi Model ===")
print(df_results)


=== Hasil Evaluasi Model ===
           Model  Accuracy  Precision    Recall  F1-Score
0  Decision Tree  1.000000   1.000000  1.000000  1.000000
1            KNN  0.997899   0.997912  0.997899  0.997899


In [None]:
joblib.dump(model_knn, '/content/explore_knn_classification.h5')

['/content/explore_knn_classification.h5']

Hyperparameter Tuning Model

Pilih salah satu algoritma yang ingin Anda tuning

In [None]:
dt = DecisionTreeClassifier(random_state=42)

param_grid = {
    'criterion': ['gini', 'entropy'],
    'max_depth': [3, 5, 7, 9, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(
    estimator=dt,
    param_grid=param_grid,
    scoring='accuracy',
    cv=5,
    n_jobs=-1,
    verbose=1
)

grid_search.fit(X_train, y_train)

best_dt = grid_search.best_estimator_

best_dt.fit(X_train, y_train)

y_pred_best = best_dt.predict(X_test)

Fitting 5 folds for each of 90 candidates, totalling 450 fits


In [None]:
y_pred_tuned = best_dt.predict(X_test)

akurasi = accuracy_score(y_test, y_pred_tuned)
presisi = precision_score(y_test, y_pred_tuned, average='weighted')
recall = recall_score(y_test, y_pred_tuned, average='weighted')
f1 = f1_score(y_test, y_pred_tuned, average='weighted')

print("=== Hasil Evaluasi Model Setelah Tuning ===")
print(f"Akurasi : {akurasi:.4f}")
print(f"Presisi : {presisi:.4f}")
print(f"Recall  : {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

print("\n=== Classification Report ===")
print(classification_report(y_test, y_pred_tuned))

print("\n=== Confusion Matrix ===")
print(confusion_matrix(y_test, y_pred_tuned))


=== Hasil Evaluasi Model Setelah Tuning ===
Akurasi : 1.0000
Presisi : 1.0000
Recall  : 1.0000
F1-Score: 1.0000

=== Classification Report ===
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       161
           1       1.00      1.00      1.00       156
           2       1.00      1.00      1.00       159

    accuracy                           1.00       476
   macro avg       1.00      1.00      1.00       476
weighted avg       1.00      1.00      1.00       476


=== Confusion Matrix ===
[[161   0   0]
 [  0 156   0]
 [  0   0 159]]


In [None]:
import joblib

# Simpan model hasil tuning
joblib.dump(best_dt, '/content/tuning_classification.h5')

print("Berhasil disimpan sebagai 'tuning_classification.h5'")


Berhasil disimpan sebagai 'tuning_classification.h5'


End of Code