In [23]:
# Import library yang diperlukan
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [24]:
# Inisialisasi model Random Forest dan Decision Tree
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42)
}

In [25]:
# Load dataset training dari program sebelumnya untuk melatih model
data_train = pd.read_csv('train.csv')
X_train = data_train.drop(columns=['y'])
y_train = data_train['y']

In [26]:
# Load dataset test
data_test = pd.read_csv('test.csv')  # Pastikan test.csv memiliki format yang sesuai dengan bankpreprocessed_data.csv
X_test = data_test.drop(columns=['y'])
y_test = data_test['y']

In [27]:
# Menyimpan hasil evaluasi dari kedua model
results = {}

# Melatih dan mengevaluasi kedua model
for model_name, model in models.items():
    # Latih model
    model.fit(X_train, y_train)

    # Prediksi pada data test
    y_pred = model.predict(X_test)

    # Hitung metrik evaluasi
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    report = classification_report(y_test, y_pred)

    # Simpan hasil
    results[model_name] = {
        "Accuracy": accuracy,
        "Confusion Matrix": conf_matrix,
        "Classification Report": report
    }

In [28]:
# Menampilkan hasil evaluasi kedua model
for model_name, metrics in results.items():
    print(f"\nHasil Evaluasi Model {model_name}:")
    print(f"Accuracy: {metrics['Accuracy']:.2f}")
    print("\nConfusion Matrix:")
    print(metrics['Confusion Matrix'])
    print("\nClassification Report:")
    print(metrics['Classification Report'])


Hasil Evaluasi Model Random Forest:
Accuracy: 0.90

Confusion Matrix:
[[11590   376]
 [  945   653]]

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.97      0.95     11966
           1       0.63      0.41      0.50      1598

    accuracy                           0.90     13564
   macro avg       0.78      0.69      0.72     13564
weighted avg       0.89      0.90      0.89     13564


Hasil Evaluasi Model Decision Tree:
Accuracy: 0.87

Confusion Matrix:
[[11107   859]
 [  851   747]]

Classification Report:
              precision    recall  f1-score   support

           0       0.93      0.93      0.93     11966
           1       0.47      0.47      0.47      1598

    accuracy                           0.87     13564
   macro avg       0.70      0.70      0.70     13564
weighted avg       0.87      0.87      0.87     13564



In [29]:
# Menghitung persentase prediksi yang benar dan salah untuk setiap model
for model_name, model in models.items():
    # Prediksi untuk data test
    y_pred = model.predict(X_test)

    correct_predictions = (y_pred == y_test).sum()
    incorrect_predictions = (y_pred != y_test).sum()
    total_predictions = len(y_test)

    correct_percentage = (correct_predictions / total_predictions) * 100
    incorrect_percentage = (incorrect_predictions / total_predictions) * 100

    print(f"\nPersentase Prediksi untuk {model_name}:")
    print(f"Prediksi Benar: {correct_percentage:.2f}%")
    print(f"Prediksi Salah: {incorrect_percentage:.2f}%")


Persentase Prediksi untuk Random Forest:
Prediksi Benar: 90.26%
Prediksi Salah: 9.74%

Persentase Prediksi untuk Decision Tree:
Prediksi Benar: 87.39%
Prediksi Salah: 12.61%


In [33]:
# Prediksi satu data untuk validasi untuk kedua model
sample_index = 9 # Ganti indeks ini untuk memilih data yang berbeda
sample_data = X_test.iloc[sample_index].values.reshape(1, -1)
sample_actual = y_test.iloc[sample_index]

# Prediksi untuk Random Forest
random_forest_prediction = models["Random Forest"].predict(sample_data)[0]

# Prediksi untuk Decision Tree
decision_tree_prediction = models["Decision Tree"].predict(sample_data)[0]

# Menampilkan data fitur beserta nama atributnya
sample_data_df = pd.DataFrame(sample_data, columns=X_test.columns)

print("\nPrediksi untuk satu data:")
print(f"Data ke-{sample_index + 1} (fitur):")
print(sample_data_df)
print(f"Nilai aktual: {sample_actual}")
print(f"Prediksi model Random Forest: {random_forest_prediction}")
print(f"Prediksi model Decision Tree: {decision_tree_prediction}")

# Bandingkan hasil prediksi
if sample_actual == random_forest_prediction:
    print("Prediksi Random Forest benar ✅")
else:
    print("Prediksi Random Forest salah ❌")

if sample_actual == decision_tree_prediction:
    print("Prediksi Decision Tree benar ✅")
else:
    print("Prediksi Decision Tree salah ❌")


Prediksi untuk satu data:
Data ke-10 (fitur):
       age  job  marital  education  default   balance  housing  loan  \
0  0.25974  9.0      2.0        1.0      0.0  0.075146      0.0   1.0   

   contact   day  month  duration  campaign  pdays  previous  poutcome  
0      2.0  20.0    6.0  0.119357  0.016129    0.0       0.0       3.0  
Nilai aktual: 0
Prediksi model Random Forest: 0
Prediksi model Decision Tree: 1
Prediksi Random Forest benar ✅
Prediksi Decision Tree salah ❌


