In [4]:
import pandas as pd
import os
import joblib

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report

# Buat folder untuk simpan model
os.makedirs("ml_models", exist_ok=True)

# Load dataset
df = pd.read_csv("altara_dataset.csv")

# Encode kolom konsistensi
le = LabelEncoder()
df['konsistensi_encoded'] = le.fit_transform(df['konsistensi'])

# Lihat hasil encoding untuk referensi
print("Label Encoding untuk konsistensi:", dict(zip(le.classes_, le.transform(le.classes_))))

# Definisikan fitur utama
X = df[['midterm_score', 'final_score', 'project_score', 'attendance']]

# ====== Model 1: Eligible Beasiswa ======
y = df['eligible_beasiswa']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model_beasiswa = LogisticRegression(class_weight='balanced')
model_beasiswa.fit(X_train, y_train)
print("\n[Eligible Beasiswa]")
print(classification_report(y_test, model_beasiswa.predict(X_test)))
joblib.dump(model_beasiswa, "ml_models/beasiswa_model.pkl")

# ====== Model 2: Non-Akademik ======
y = df['non_akademik']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model_non_akademik = LogisticRegression(class_weight='balanced')
model_non_akademik.fit(X_train, y_train)
print("\n[Non-Akademik]")
print(classification_report(y_test, model_non_akademik.predict(X_test)))
joblib.dump(model_non_akademik, "ml_models/non_akademik_model.pkl")

# ====== Model 3: Konsistensi ======
y = df['konsistensi_encoded']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model_konsistensi = RandomForestClassifier()
model_konsistensi.fit(X_train, y_train)
print("\n[Konsistensi]")
print(classification_report(y_test, model_konsistensi.predict(X_test)))
joblib.dump(model_konsistensi, "ml_models/konsistensi_model.pkl")

# ====== Model 4: Magang Awal ======
y = df['magang_awal']
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
model_magang = LogisticRegression(class_weight='balanced')
model_magang.fit(X_train, y_train)
print("\n[Magang Awal]")
print(classification_report(y_test, model_magang.predict(X_test)))
joblib.dump(model_magang, "ml_models/magang_model.pkl")


Label Encoding untuk konsistensi: {'Konsisten': np.int64(0), 'Naik': np.int64(1), 'Turun': np.int64(2)}

[Eligible Beasiswa]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        24
           1       1.00      1.00      1.00         1

    accuracy                           1.00        25
   macro avg       1.00      1.00      1.00        25
weighted avg       1.00      1.00      1.00        25


[Non-Akademik]
              precision    recall  f1-score   support

           0       1.00      0.96      0.98        24
           1       0.50      1.00      0.67         1

    accuracy                           0.96        25
   macro avg       0.75      0.98      0.82        25
weighted avg       0.98      0.96      0.97        25


[Konsistensi]
              precision    recall  f1-score   support

           0       0.50      0.50      0.50         2
           1       0.92      0.92      0.92        12
           2       1.00    

['ml_models/magang_model.pkl']