In [34]:
# Import library yang diperlukan
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [35]:
# Inisialisasi model Random Forest dan Decision Tree
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42)
}

In [36]:
# Load dataset training dari program sebelumnya untuk melatih model
data_train = pd.read_csv('train.csv')
X_train = data_train.drop(columns=['y'])
y_train = data_train['y']

In [37]:
# Load dataset test
data_test = pd.read_csv('test.csv')  # Pastikan test.csv memiliki format yang sesuai dengan bankpreprocessed_data.csv
X_test = data_test.drop(columns=['y'])
y_test = data_test['y']

In [38]:
# Menyimpan hasil evaluasi dari kedua model
results = {}

# Melatih dan mengevaluasi kedua model
for model_name, model in models.items():
    # Latih model
    model.fit(X_train, y_train)

    # Prediksi pada data training
    y_train_pred = model.predict(X_train)

    # Prediksi pada data test
    y_test_pred = model.predict(X_test)

    # Hitung metrik evaluasi untuk data training
    train_accuracy = accuracy_score(y_train, y_train_pred)
    train_conf_matrix = confusion_matrix(y_train, y_train_pred)
    train_report = classification_report(y_train, y_train_pred)

    # Hitung metrik evaluasi untuk data test
    test_accuracy = accuracy_score(y_test, y_test_pred)
    test_conf_matrix = confusion_matrix(y_test, y_test_pred)
    test_report = classification_report(y_test, y_test_pred)

    # Simpan hasil
    results[model_name] = {
        "Train Accuracy": train_accuracy,
        "Test Accuracy": test_accuracy,
        "Train Confusion Matrix": train_conf_matrix,
        "Test Confusion Matrix": test_conf_matrix,
        "Train Classification Report": train_report,
        "Test Classification Report": test_report,
    }

In [39]:
# Menampilkan hasil evaluasi kedua model
for model_name, metrics in results.items():
    print(f"\nHasil Evaluasi Model {model_name}:")
    print(f"Train Accuracy: {metrics['Train Accuracy']:.2f}")
    print(f"Test Accuracy: {metrics['Test Accuracy']:.2f}")
    print("\nTrain Confusion Matrix:")
    print(metrics['Train Confusion Matrix'])
    print("\nTest Confusion Matrix:")
    print(metrics['Test Confusion Matrix'])
    print("\nTrain Classification Report:")
    print(metrics['Train Classification Report'])
    print("\nTest Classification Report:")
    print(metrics['Test Classification Report'])


Hasil Evaluasi Model Random Forest:
Train Accuracy: 1.00
Test Accuracy: 0.90

Train Confusion Matrix:
[[27956     0]
 [    2  3689]]

Test Confusion Matrix:
[[11590   376]
 [  945   653]]

Train Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00     27956
           1       1.00      1.00      1.00      3691

    accuracy                           1.00     31647
   macro avg       1.00      1.00      1.00     31647
weighted avg       1.00      1.00      1.00     31647


Test Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.97      0.95     11966
           1       0.63      0.41      0.50      1598

    accuracy                           0.90     13564
   macro avg       0.78      0.69      0.72     13564
weighted avg       0.89      0.90      0.89     13564


Hasil Evaluasi Model Decision Tree:
Train Accuracy: 1.00
Test Accuracy: 0.87

Train Confusion Mat

In [40]:
# Analisis Bias dan Variance
print("\nAnalisis Bias dan Variance:")
for model_name, metrics in results.items():
    print(f"\nModel: {model_name}")
    train_accuracy = metrics['Train Accuracy']
    test_accuracy = metrics['Test Accuracy']

    if train_accuracy > test_accuracy:
        variance = train_accuracy - test_accuracy
        print(f"Variance (Perbedaan Train-Test Accuracy): {variance:.2f}")
        if variance > 0.10:
            print("Model kemungkinan overfitting (high variance).")
        else:
            print("Variance terkendali.")
    else:
        print("Tidak ada indikasi overfitting.")

    if train_accuracy < 0.80:
        print("Model kemungkinan memiliki bias tinggi (low training accuracy).")
    else:
        print("Bias rendah.")


Analisis Bias dan Variance:

Model: Random Forest
Variance (Perbedaan Train-Test Accuracy): 0.10
Variance terkendali.
Bias rendah.

Model: Decision Tree
Variance (Perbedaan Train-Test Accuracy): 0.13
Model kemungkinan overfitting (high variance).
Bias rendah.


In [41]:
# Menghitung persentase prediksi yang benar dan salah untuk setiap model
for model_name, model in models.items():
    # Prediksi untuk data test
    y_pred = model.predict(X_test)

    correct_predictions = (y_pred == y_test).sum()
    incorrect_predictions = (y_pred != y_test).sum()
    total_predictions = len(y_test)

    correct_percentage = (correct_predictions / total_predictions) * 100
    incorrect_percentage = (incorrect_predictions / total_predictions) * 100

    print(f"\nPersentase Prediksi untuk {model_name}:")
    print(f"Prediksi Benar: {correct_percentage:.2f}%")
    print(f"Prediksi Salah: {incorrect_percentage:.2f}%")


Persentase Prediksi untuk Random Forest:
Prediksi Benar: 90.26%
Prediksi Salah: 9.74%

Persentase Prediksi untuk Decision Tree:
Prediksi Benar: 87.39%
Prediksi Salah: 12.61%


In [42]:
# Prediksi satu data untuk validasi untuk kedua model
sample_index = 9 # Ganti indeks ini untuk memilih data yang berbeda
sample_data = X_test.iloc[sample_index].values.reshape(1, -1)
sample_actual = y_test.iloc[sample_index]

# Prediksi untuk Random Forest
random_forest_prediction = models["Random Forest"].predict(sample_data)[0]

# Prediksi untuk Decision Tree
decision_tree_prediction = models["Decision Tree"].predict(sample_data)[0]

# Menampilkan data fitur beserta nama atributnya
sample_data_df = pd.DataFrame(sample_data, columns=X_test.columns)

print("\nPrediksi untuk satu data:")
print(f"Data ke-{sample_index + 1} (fitur):")
print(sample_data_df)
print(f"Nilai aktual: {sample_actual}")
print(f"Prediksi model Random Forest: {random_forest_prediction}")
print(f"Prediksi model Decision Tree: {decision_tree_prediction}")

# Bandingkan hasil prediksi
if sample_actual == random_forest_prediction:
    print("Prediksi Random Forest benar ✅")
else:
    print("Prediksi Random Forest salah ❌")

if sample_actual == decision_tree_prediction:
    print("Prediksi Decision Tree benar ✅")
else:
    print("Prediksi Decision Tree salah ❌")


Prediksi untuk satu data:
Data ke-10 (fitur):
       age  job  marital  education  default   balance  housing  loan  \
0  0.25974  9.0      2.0        1.0      0.0  0.075146      0.0   1.0   

   contact   day  month  duration  campaign  pdays  previous  poutcome  
0      2.0  20.0    6.0  0.119357  0.016129    0.0       0.0       3.0  
Nilai aktual: 0
Prediksi model Random Forest: 0
Prediksi model Decision Tree: 1
Prediksi Random Forest benar ✅
Prediksi Decision Tree salah ❌


