# **1. Import Library**

In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import classification_report
import joblib

# **2. Memuat Dataset dari Hasil Clustering**

In [None]:
df = pd.read_csv("data_hasil_clust/data_clustering_inverse.csv")

df.head()

## **Feature Encoding: One Hot Encoding**

In [None]:
categorical_cols = list(df.select_dtypes(include=['object']).columns)

# Gunakan 'pd.get_dummies' untuk melakukan OneHotEncoding
df_encoded = pd.get_dummies(
    df,
    columns = categorical_cols,
    drop_first = True
)

df_encoded.head()

Unnamed: 0,TransactionAmount,CustomerAge,TransactionDuration,LoginAttempts,AccountBalance,Target,TransactionType_Debit,Location_Atlanta,Location_Austin,Location_Baltimore,...,Location_Tucson,Location_Virginia Beach,Location_Washington,Channel_Branch,Channel_Online,CustomerOccupation_Engineer,CustomerOccupation_Retired,CustomerOccupation_Student,AgeGroup_Muda,AgeGroup_Tua
0,14.09,70.0,81.0,1.0,5112.21,1,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
1,376.24,68.0,141.0,1.0,13758.91,0,True,False,False,False,...,False,False,False,False,False,False,False,False,False,True
2,126.29,19.0,56.0,1.0,1122.35,1,True,False,False,False,...,False,False,False,False,True,False,False,True,True,False
3,184.5,26.0,25.0,1.0,8569.06,1,True,False,False,False,...,False,False,False,False,True,False,False,True,True,False
4,92.15,18.0,172.0,1.0,781.68,1,True,False,False,False,...,False,False,False,False,False,False,False,True,True,False


# **3. Data Splitting**
Tahap Data Splitting bertujuan untuk memisahkan dataset menjadi dua bagian: data latih (training set) dan data uji (test set)

In [None]:
# Menggunakan train_test_split() untuk melakukan pembagian dataset.
X = df_encoded.drop('Target', axis=1)
y = df_encoded['Target']

X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size = 0.2,
    random_state = 42,
    stratify = y
)

print("Jumlah data total: ",len(X))
print("Jumlah data latih: ",len(X_train))
print("Jumlah data test: ",len(X_test))

Jumlah data total:  1945
Jumlah data latih:  1556
Jumlah data test:  389


# **4. Membangun Model Klasifikasi**
Setelah memilih algoritma klasifikasi yang sesuai, langkah selanjutnya adalah melatih model menggunakan data latih

1. Menggunakan algoritma klasifikasi yaitu Decision Tree
2. Latih model menggunakan data yang sudah dipisah

In [None]:
# Membuat model klasifikasi menggunakan Decision Tree

# 1. Buat objek model Decision Tree
decision_tree_model = DecisionTreeClassifier(random_state=42)

# 2. Latih (fit) model dengan data training (X_train dan y_train)
decision_tree_model.fit(X_train, y_train)

In [None]:
# Menyimpan Model
joblib.dump(decision_tree_model, 'decision_tree_model.h5')

['decision_tree_model.h5']

# **5. Memenuhi Kriteria Skilled dan Advanced dalam Membangun Model Klasifikasi**



In [None]:
# Melatih model menggunakan algoritma klasifikasi scikit-learn selain Decision Tree. (Contoh: RandomForestClassifier)
# Buat objek model baru
new_model = RandomForestClassifier(random_state=42)

# Latih (fit) model dengan data training (X_train dan y_train)
new_model.fit(X_train, y_train)

In [None]:
# Menampilkan hasil evaluasi akurasi, presisi, recall, dan F1-Score pada seluruh algoritma yang sudah dibuat.

# Buat prediksi pada data 'X_test' menggunakan kedua model
y_pred_dt = decision_tree_model.predict(X_test)
y_pred_new = new_model.predict(X_test)

# Tampilkan classification_report untuk Decision Tree
print("Decision Tree Performance")
print(classification_report(y_test, y_pred_dt))

print("="*50)

# Tampilkan classification_report untuk New Model
print("New Model Performance")
print(classification_report(y_test, y_pred_new))

Decision Tree Performance
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       196
           1       1.00      1.00      1.00       193

    accuracy                           1.00       389
   macro avg       1.00      1.00      1.00       389
weighted avg       1.00      1.00      1.00       389

New Model Performance
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       196
           1       1.00      1.00      1.00       193

    accuracy                           1.00       389
   macro avg       1.00      1.00      1.00       389
weighted avg       1.00      1.00      1.00       389



In [None]:
# Menyimpan Model Selain Decision Tree
joblib.dump(new_model, 'explore_RandomForest_classification.h5')

['explore_RandomForestClassifier_classification.h5']

Hyperparameter Tuning Model

In [None]:
# Lakukan Hyperparameter Tuning dan Latih ulang.

# Tentukan Hyperparameter yang akan di-tuning
params = {'n_estimators': [100, 200, 300],
          'max_depth': [10, 20, 30],
          'min_samples_split': [2, 5, 10],
          'criterion': ['gini', 'entropy']
          }

# Buat (instantiate) objek dari algoritma tuning
new_model_tuned = GridSearchCV(
    estimator = RandomForestClassifier(random_state=42),
    param_grid = params,
    cv = 5,
    scoring = 'accuracy'
)

# Latih objek model dengan data training (X_train dan y_train)
new_model_tuned.fit(X_train, y_train)

In [None]:
# Menampilkan hasil evaluasi akurasi, presisi, recall, dan F1-Score pada algoritma yang sudah dituning.

# Buat prediksi pada 'X_test' Gunakan model yang sudah di-tuning
y_pred_tuning = new_model_tuned.predict(X_test)

# Tampilkan classification_report untuk model yang sudah di-tuning
print("Tuned Model Performance")
print(classification_report(y_test, y_pred_tuning))

Tuned Model Performance
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       196
           1       1.00      1.00      1.00       193

    accuracy                           1.00       389
   macro avg       1.00      1.00      1.00       389
weighted avg       1.00      1.00      1.00       389



In [None]:
# Menyimpan Model hasil tuning
joblib.dump(new_model_tuned, 'tuning_classification.h5')

['tuning_classification.h5']