In [1]:
# Instalasi paket yang diperlukan (Jika belum terpasang)
!pip install xgboost
!pip install pandas openpyxl




In [2]:
# Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [3]:
# Muat data dari file Excel
df = pd.read_excel('data_kelulusan.xlsx')

# Tampilkan nama-nama kolom untuk memastikan semuanya sesuai
print(df.columns)

# Tampilkan DataFrame untuk pemeriksaan lebih lanjut
print(df.head())


Index(['KUIS', 'TUGAS', 'UTS', 'UAS', 'JK', 'NA'], dtype='object')
   KUIS  TUGAS  UTS  UAS JK     NA
0    70     85   88   80  L  80.75
1    60     70   77   65  L  68.00
2    80     80   80   80  L  80.00
3    79     80   65   60  L  71.00
4    55     70   80   85  L  72.50


In [4]:
# Pastikan kolom-kolom yang diharapkan ada di dalam DataFrame
expected_columns = ['KUIS', 'TUGAS', 'UTS', 'UAS', 'JK', 'NA']
for col in expected_columns:
    if col not in df.columns:
        raise ValueError(f"Kolom {col} tidak ditemukan di dalam DataFrame")

# Tetapkan nilai lulus (NA lebih besar dari 60 dianggap lulus)
df['Target'] = np.where(df['NA'] > 60, 'Lulus', 'Tidak')

# Encode target variable dan jenis kelamin
df['Target'] = df['Target'].map({'Lulus': 1, 'Tidak': 0})
df['JK'] = df['JK'].map({'L': 1, 'P': 0})

# Split data into features and target
X = df.drop(['Target', 'NA'], axis=1)
y = df['Target']

# Split data into training and testing sets (test size 20%)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:
# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)


In [6]:
# Train XGBoost model
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss')
xgb_model.fit(X_train, y_train)
xgb_predictions = xgb_model.predict(X_test)


In [7]:
# Evaluate models
rf_accuracy = accuracy_score(y_test, rf_predictions)
xgb_accuracy = accuracy_score(y_test, xgb_predictions)

print(f"Random Forest Accuracy: {rf_accuracy:.2f}")
print(f"XGBoost Accuracy: {xgb_accuracy:.2f}")

# Confusion matrices
print("\nRandom Forest Confusion Matrix:")
print(confusion_matrix(y_test, rf_predictions))
print("\nXGBoost Confusion Matrix:")
print(confusion_matrix(y_test, xgb_predictions))

# Classification reports
print("\nRandom Forest Classification Report:")
print(classification_report(y_test, rf_predictions))
print("\nXGBoost Classification Report:")
print(classification_report(y_test, xgb_predictions))

# Interpretasi Model Terbaik
if rf_accuracy > xgb_accuracy:
    print("\nModel terbaik adalah Random Forest dengan akurasi {:.2f}".format(rf_accuracy))
else:
    print("\nModel terbaik adalah XGBoost dengan akurasi {:.2f}".format(xgb_accuracy))


Random Forest Accuracy: 0.80
XGBoost Accuracy: 0.70

Random Forest Confusion Matrix:
[[1 2]
 [0 7]]

XGBoost Confusion Matrix:
[[1 2]
 [1 6]]

Random Forest Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       0.78      1.00      0.88         7

    accuracy                           0.80        10
   macro avg       0.89      0.67      0.69        10
weighted avg       0.84      0.80      0.76        10


XGBoost Classification Report:
              precision    recall  f1-score   support

           0       0.50      0.33      0.40         3
           1       0.75      0.86      0.80         7

    accuracy                           0.70        10
   macro avg       0.62      0.60      0.60        10
weighted avg       0.68      0.70      0.68        10


Model terbaik adalah Random Forest dengan akurasi 0.80
