In [None]:
# build_final_elm_model_fixed.py
import numpy as np
import pandas as pd
import pickle
import time

def load_cleaned_data(filename='data_cleaned.pkl'):
    """Load data yang sudah dibersihkan"""
    print("Loading cleaned data...")
    with open(filename, 'rb') as f:
        data = pickle.load(f)
    return data

# ================================
# ELM MODEL - SAMA PERSIS DENGAN KODE CV ANDA
# ================================

def elm_model_fixed(X_train, y_train, X_test, n_hidden=50, activation='sigmoid', random_seed=None, reg_lambda=0.01):
    """ELM model SAMA PERSIS dengan kode cross validation Anda"""
    
    if random_seed is not None:
        np.random.seed(random_seed)
    
    n_features = X_train.shape[1]
    
    W = np.random.normal(0, 1, (n_features, n_hidden))
    b = np.random.normal(0, 1, n_hidden)

    H = np.dot(X_train, W) + b

    if activation == 'sigmoid':
        H = 1 / (1 + np.exp(-np.clip(H, -250, 250)))
    elif activation == 'tanh':
        H = np.tanh(H)
    elif activation == 'relu':
        H = np.maximum(0, H)

    I = np.identity(H.shape[1])
    
    try:
        beta = np.linalg.pinv(H.T @ H + reg_lambda * I) @ H.T @ y_train
    except np.linalg.LinAlgError:
        beta = np.linalg.lstsq(H.T @ H + reg_lambda * I, H.T @ y_train, rcond=None)[0]

    H_test = np.dot(X_test, W) + b
    if activation == 'sigmoid':
        H_test = 1 / (1 + np.exp(-np.clip(H_test, -250, 250)))
    elif activation == 'tanh':
        H_test = np.tanh(H_test)
    elif activation == 'relu':
        H_test = np.maximum(0, H_test)

    y_pred = H_test @ beta
    
    y_pred_proba = 1 / (1 + np.exp(-np.clip(y_pred, -250, 250)))
    
    return y_pred_proba, W, b, beta

def find_optimal_threshold(y_true, y_pred_proba):
    """Mencari threshold optimal menggunakan Youden's J statistic"""
    from sklearn.metrics import roc_curve
    
    fpr, tpr, thresholds = roc_curve(y_true, y_pred_proba)
    if len(thresholds) == 0:
        return 0.5
    
    J = tpr - fpr
    best_idx = np.argmax(J)
    return thresholds[best_idx]

class ELMFinalModel:
    """Class untuk model ELM final"""
    
    def __init__(self, n_hidden=50, activation='sigmoid', random_seed=None, reg_lambda=0.01, threshold=0.5):
        self.n_hidden = n_hidden
        self.activation = activation
        self.random_seed = random_seed
        self.reg_lambda = reg_lambda
        self.threshold = threshold
        self.W = None
        self.b = None
        self.beta = None
        self.is_fitted = False
    
    def fit(self, X_train, y_train):
        """Train model - SAMA dengan implementasi di kode CV"""
        if self.random_seed is not None:
            np.random.seed(self.random_seed)
        
        n_features = X_train.shape[1]
        
        # Initialize weights and biases - SAMA dengan kode CV
        self.W = np.random.normal(0, 1, (n_features, self.n_hidden))
        self.b = np.random.normal(0, 1, self.n_hidden)
        
        # Calculate hidden layer - SAMA dengan kode CV
        H = np.dot(X_train, self.W) + self.b
        
        if self.activation == 'sigmoid':
            H = 1 / (1 + np.exp(-np.clip(H, -250, 250)))
        elif self.activation == 'tanh':
            H = np.tanh(H)
        elif self.activation == 'relu':
            H = np.maximum(0, H)
        
        # Calculate output weights - SAMA dengan kode CV
        I = np.identity(H.shape[1])
        
        try:
            self.beta = np.linalg.pinv(H.T @ H + self.reg_lambda * I) @ H.T @ y_train
        except np.linalg.LinAlgError:
            self.beta = np.linalg.lstsq(H.T @ H + self.reg_lambda * I, H.T @ y_train, rcond=None)[0]
        
        self.is_fitted = True
        return self
    
    def predict_proba(self, X_test):
        """Predict probabilities - SAMA dengan kode CV"""
        if not self.is_fitted:
            raise ValueError("Model belum dilatih.")
        
        # Calculate hidden layer untuk test data - SAMA dengan kode CV
        H_test = np.dot(X_test, self.W) + self.b
        
        if self.activation == 'sigmoid':
            H_test = 1 / (1 + np.exp(-np.clip(H_test, -250, 250)))
        elif self.activation == 'tanh':
            H_test = np.tanh(H_test)
        elif self.activation == 'relu':
            H_test = np.maximum(0, H_test)
        
        # Calculate output - SAMA dengan kode CV
        y_pred = H_test @ self.beta
        
        # Convert ke probabilities dengan sigmoid - SAMA dengan kode CV
        y_pred_proba = 1 / (1 + np.exp(-np.clip(y_pred, -250, 250)))
        
        return y_pred_proba
    
    def predict(self, X_test, threshold=None):
        """Predict binary classes"""
        if threshold is None:
            threshold = self.threshold
        y_pred_proba = self.predict_proba(X_test)
        return (y_pred_proba >= threshold).astype(int)
    
    def save(self, filename):
        """Save model to file"""
        model_data = {
            'W': self.W,
            'b': self.b,
            'beta': self.beta,
            'n_hidden': self.n_hidden,
            'activation': self.activation,
            'random_seed': self.random_seed,
            'reg_lambda': self.reg_lambda,
            'threshold': self.threshold,
            'is_fitted': self.is_fitted
        }
        with open(filename, 'wb') as f:
            pickle.dump(model_data, f)
        print(f"Model disimpan ke: {filename}")
    
    @classmethod
    def load(cls, filename):
        """Load model from file"""
        with open(filename, 'rb') as f:
            model_data = pickle.load(f)
        
        model = cls(
            n_hidden=model_data['n_hidden'],
            activation=model_data['activation'],
            random_seed=model_data['random_seed'],
            reg_lambda=model_data['reg_lambda'],
            threshold=model_data['threshold']
        )
        
        model.W = model_data['W']
        model.b = model_data['b']
        model.beta = model_data['beta']
        model.is_fitted = True
        
        return model

# ================================
# TRAIN MODEL FINAL - SIMPLE VERSION
# ================================

def train_final_model_simple():
    """Train model final dengan cara SEDERHANA dan PASTI BERHASIL"""
    
    print("=" * 70)
    print("MEMBANGUN MODEL ELM FINAL - SIMPLE VERSION")
    print("=" * 70)
    
    # Konfigurasi terbaik
    BEST_CONFIG = {
        'n_hidden': 50,
        'activation': 'sigmoid',
        'reg_lambda': 1e-8,
        'random_seed': 9011
    }
    
    print(f"Konfigurasi: {BEST_CONFIG}")
    
    # 1. Load data
    print("\n1. Memuat data...")
    data_loaded = load_cleaned_data('data_cleaned.pkl')
    data_cleaned = data_loaded['data_cleaned']
    preprocessor = data_loaded['preprocessor']
    
    # 2. Prepare data
    X = data_cleaned.drop(columns=['diagnosis_lanjutan'])
    y = data_cleaned['diagnosis_lanjutan'].values
    
    print(f"   ‚Ä¢ Fitur: {X.shape[1]}, Sampel: {X.shape[0]}")
    
    # 3. Preprocess data
    print("\n2. Preprocessing data...")
    X_processed = preprocessor.fit_transform(X)
    
    # 4. Train model langsung dengan seluruh data
    print("3. Melatih model dengan seluruh data...")
    start_time = time.time()
    
    # Gunakan fungsi elm_model_fixed yang SAMA dengan kode CV Anda
    y_pred_proba, W, b, beta = elm_model_fixed(
        X_processed, y, X_processed,  # Gunakan data yang sama untuk prediksi
        n_hidden=BEST_CONFIG['n_hidden'],
        activation=BEST_CONFIG['activation'],
        random_seed=BEST_CONFIG['random_seed'],
        reg_lambda=BEST_CONFIG['reg_lambda']
    )
    
    # Cari threshold optimal
    threshold = find_optimal_threshold(y, y_pred_proba)
    
    training_time = time.time() - start_time
    
    print(f"   ‚Ä¢ Training selesai: {training_time:.4f} detik")
    print(f"   ‚Ä¢ Threshold optimal: {threshold:.4f}")
    
    # 5. Buat model object
    model = ELMFinalModel(
        n_hidden=BEST_CONFIG['n_hidden'],
        activation=BEST_CONFIG['activation'],
        random_seed=BEST_CONFIG['random_seed'],
        reg_lambda=BEST_CONFIG['reg_lambda'],
        threshold=threshold
    )
    
    # Set weights dari hasil training
    model.W = W
    model.b = b
    model.beta = beta
    model.is_fitted = True
    
    # 6. Evaluasi
    print("\n4. Evaluasi model...")
    from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
    
    y_pred = (y_pred_proba >= threshold).astype(int)
    
    metrics = {
        'accuracy': accuracy_score(y, y_pred),
        'precision': precision_score(y, y_pred, zero_division=0),
        'recall': recall_score(y, y_pred, zero_division=0),
        'f1': f1_score(y, y_pred, zero_division=0),
        'auc': roc_auc_score(y, y_pred_proba)
    }
    
    print(f"   ‚Ä¢ Accuracy:  {metrics['accuracy']:.4f}")
    print(f"   ‚Ä¢ Precision: {metrics['precision']:.4f}")
    print(f"   ‚Ä¢ Recall:    {metrics['recall']:.4f}")
    print(f"   ‚Ä¢ F1-Score:  {metrics['f1']:.4f}")
    print(f"   ‚Ä¢ AUC-ROC:   {metrics['auc']:.4f}")
    
    # 7. Simpan model dan preprocessor
    print("\n5. Menyimpan model...")
    timestamp = time.strftime("%Y%m%d_%H%M%S")
    
    # Save model
    model_filename = f'final_elm_model_seed9011_{timestamp}.pkl'
    model.save(model_filename)
    
    # Save preprocessor
    preprocessor_filename = f'elm_preprocessor_{timestamp}.pkl'
    with open(preprocessor_filename, 'wb') as f:
        pickle.dump(preprocessor, f)
    
    print(f"\n‚úÖ MODEL BERHASIL DIBANGUN!")
    print("=" * 70)
    print(f"\nüìÅ File yang disimpan:")
    print(f"1. {model_filename} - Model ELM")
    print(f"2. {preprocessor_filename} - Preprocessor")
    
    # 8. Bandingkan dengan hasil CV
    print("\n" + "=" * 70)
    print("PERBANDINGAN DENGAN HASIL CROSS VALIDATION")
    print("=" * 70)
    print("\nHASIL 10-FOLD CV (Seed 9011):")
    print("  ‚Ä¢ F1-Score:  0.8911 ¬± 0.0191")
    print("  ‚Ä¢ AUC-ROC:   0.9454 ¬± 0.0273")
    print("  ‚Ä¢ Accuracy:  0.9102 ¬± 0.0184")
    
    print("\nHASIL MODEL FINAL:")
    print(f"  ‚Ä¢ F1-Score:  {metrics['f1']:.4f}")
    print(f"  ‚Ä¢ AUC-ROC:   {metrics['auc']:.4f}")
    print(f"  ‚Ä¢ Accuracy:  {metrics['accuracy']:.4f}")
    
    return model, preprocessor, metrics

# ================================
# PREDICT WITH MODEL
# ================================

def test_model_prediction():
    """Test model yang sudah disimpan"""
    
    print("\n" + "=" * 70)
    print("TEST PREDIKSI MODEL")
    print("=" * 70)
    
    import glob
    
    # Cari file model terbaru
    model_files = glob.glob('final_elm_model_seed9011_*.pkl')
    if not model_files:
        print("‚ùå Tidak ada model yang ditemukan.")
        return
    
    # Ambil model terbaru
    model_file = sorted(model_files)[-1]
    preprocessor_file = model_file.replace('final_elm_model_seed9011_', 'elm_preprocessor_')
    
    print(f"Model: {model_file}")
    print(f"Preprocessor: {preprocessor_file}")
    
    # Load model
    model = ELMFinalModel.load(model_file)
    
    # Load preprocessor
    with open(preprocessor_file, 'rb') as f:
        preprocessor = pickle.load(f)
    
    # Load data untuk contoh
    data_loaded = load_cleaned_data('data_cleaned.pkl')
    data_cleaned = data_loaded['data_cleaned']
    
    # Ambil 3 sampel pertama
    X_example = data_cleaned.drop(columns=['diagnosis_lanjutan']).head(3)
    y_example = data_cleaned['diagnosis_lanjutan'].head(3).values
    
    # Preprocess
    X_example_processed = preprocessor.transform(X_example)
    
    # Predict
    y_pred_proba = model.predict_proba(X_example_processed)
    y_pred = model.predict(X_example_processed)
    
    print("\nHasil prediksi 3 sampel pertama:")
    print("-" * 40)
    for i in range(len(X_example)):
        print(f"Sampel {i+1}:")
        print(f"  ‚Ä¢ Actual: {y_example[i]}")
        print(f"  ‚Ä¢ Predicted: {y_pred[i]}")
        print(f"  ‚Ä¢ Probability: {y_pred_proba[i]:.4f}")
        print(f"  ‚Ä¢ Status: {'‚úì' if y_example[i] == y_pred[i] else '‚úó'}")
        print()

# ================================
# MAIN EXECUTION
# ================================

if __name__ == "__main__":
    print("\n" + "=" * 70)
    print("PEMBENTUKAN MODEL ELM FINAL DENGAN KONFIGURASI TERBAIK")
    print("=" * 70)
    
    print("\nKonfigurasi yang akan digunakan:")
    print("  ‚Ä¢ n_hidden: 50")
    print("  ‚Ä¢ activation: sigmoid")
    print("  ‚Ä¢ reg_lambda: 1e-08")
    print("  ‚Ä¢ random_seed: 9011")
    
    # Train model
    model, preprocessor, metrics = train_final_model_simple()
    
    # Test model
    test_model_prediction()
    
    print("\n" + "=" * 70)
    print("INSTRUKSI PENGGUNAAN MODEL")
    print("=" * 70)
    print("""
Untuk menggunakan model di masa depan:

1. Load model:
   model = ELMFinalModel.load('final_elm_model_seed9011_YYYYMMDD_HHMMSS.pkl')
   
2. Load preprocessor:
   with open('elm_preprocessor_YYYYMMDD_HHMMSS.pkl', 'rb') as f:
       preprocessor = pickle.load(f)
       
3. Preprocess data baru:
   X_new_processed = preprocessor.transform(X_new)
   
4. Prediksi:
   y_pred_proba = model.predict_proba(X_new_processed)
   y_pred = model.predict(X_new_processed)  # Menggunakan threshold optimal
   
   atau dengan threshold custom:
   y_pred = model.predict(X_new_processed, threshold=0.5)
    """)


PEMBENTUKAN MODEL ELM FINAL DENGAN KONFIGURASI TERBAIK

Konfigurasi yang akan digunakan:
  ‚Ä¢ n_hidden: 50
  ‚Ä¢ activation: sigmoid
  ‚Ä¢ reg_lambda: 1e-08
  ‚Ä¢ random_seed: 9011
MEMBANGUN MODEL ELM FINAL - SIMPLE VERSION
Konfigurasi: {'n_hidden': 50, 'activation': 'sigmoid', 'reg_lambda': 1e-08, 'random_seed': 9011}

1. Memuat data...
Loading cleaned data...
   ‚Ä¢ Fitur: 13, Sampel: 457

2. Preprocessing data...
3. Melatih model dengan seluruh data...
   ‚Ä¢ Training selesai: 0.0081 detik
   ‚Ä¢ Threshold optimal: 0.6243

4. Evaluasi model...
   ‚Ä¢ Accuracy:  0.9256
   ‚Ä¢ Precision: 0.9022
   ‚Ä¢ Recall:    0.9121
   ‚Ä¢ F1-Score:  0.9071
   ‚Ä¢ AUC-ROC:   0.9690

5. Menyimpan model...
Model disimpan ke: final_elm_model_seed9011_20251214_071203.pkl

‚úÖ MODEL BERHASIL DIBANGUN!

üìÅ File yang disimpan:
1. final_elm_model_seed9011_20251214_071203.pkl - Model ELM
2. elm_preprocessor_20251214_071203.pkl - Preprocessor

PERBANDINGAN DENGAN HASIL CROSS VALIDATION

HASIL 10-FOLD CV 

======================================================================
PEMBENTUKAN MODEL ELM FINAL DENGAN KONFIGURASI TERBAIK
======================================================================

Konfigurasi yang akan digunakan:
  ‚Ä¢ n_hidden: 50
  ‚Ä¢ activation: sigmoid
  ‚Ä¢ reg_lambda: 1e-08
  ‚Ä¢ random_seed: 9011
======================================================================
MEMBANGUN MODEL ELM FINAL - SIMPLE VERSION
======================================================================
Konfigurasi: {'n_hidden': 50, 'activation': 'sigmoid', 'reg_lambda': 1e-08, 'random_seed': 9011}

1. Memuat data...
Loading cleaned data...
   ‚Ä¢ Fitur: 13, Sampel: 457

2. Preprocessing data...
3. Melatih model dengan seluruh data...
   ‚Ä¢ Training selesai: 0.0081 detik
   ‚Ä¢ Threshold optimal: 0.6243

4. Evaluasi model...
   ‚Ä¢ Accuracy:  0.9256
   ‚Ä¢ Precision: 0.9022
   ‚Ä¢ Recall:    0.9121
   ‚Ä¢ F1-Score:  0.9071
   ‚Ä¢ AUC-ROC:   0.9690

5. Menyimpan model...
Model disimpan ke: final_elm_model_seed9011_20251214_071203.pkl

‚úÖ MODEL BERHASIL DIBANGUN!
======================================================================

üìÅ File yang disimpan:
1. final_elm_model_seed9011_20251214_071203.pkl - Model ELM
2. elm_preprocessor_20251214_071203.pkl - Preprocessor

======================================================================
PERBANDINGAN DENGAN HASIL CROSS VALIDATION
======================================================================

HASIL 10-FOLD CV (Seed 9011):
  ‚Ä¢ F1-Score:  0.8911 ¬± 0.0191
  ‚Ä¢ AUC-ROC:   0.9454 ¬± 0.0273
  ‚Ä¢ Accuracy:  0.9102 ¬± 0.0184

HASIL MODEL FINAL:
  ‚Ä¢ F1-Score:  0.9071
  ‚Ä¢ AUC-ROC:   0.9690
  ‚Ä¢ Accuracy:  0.9256

======================================================================
TEST PREDIKSI MODEL
======================================================================
Model: final_elm_model_seed9011_20251214_071203.pkl
Preprocessor: elm_preprocessor_20251214_071203.pkl
Loading cleaned data...

Hasil prediksi 3 sampel pertama:
----------------------------------------
Sampel 1:
  ‚Ä¢ Actual: 0
  ‚Ä¢ Predicted: 0
  ‚Ä¢ Probability: 0.5490
  ‚Ä¢ Status: ‚úì

Sampel 2:
  ‚Ä¢ Actual: 1
  ‚Ä¢ Predicted: 1
  ‚Ä¢ Probability: 0.7896
  ‚Ä¢ Status: ‚úì

Sampel 3:
  ‚Ä¢ Actual: 1
  ‚Ä¢ Predicted: 1
  ‚Ä¢ Probability: 0.6856
  ‚Ä¢ Status: ‚úì


======================================================================  
INSTRUKSI PENGGUNAAN MODEL
======================================================================

Untuk menggunakan model di masa depan:

1. Load model:
   model = ELMFinalModel.load('final_elm_model_seed9011_YYYYMMDD_HHMMSS.pkl')

2. Load preprocessor:
   with open('elm_preprocessor_YYYYMMDD_HHMMSS.pkl', 'rb') as f:
       preprocessor = pickle.load(f)

3. Preprocess data baru:
   X_new_processed = preprocessor.transform(X_new)

4. Prediksi:
   y_pred_proba = model.predict_proba(X_new_processed)
   y_pred = model.predict(X_new_processed)  # Menggunakan threshold optimal

   atau dengan threshold custom:
   y_pred = model.predict(X_new_processed, threshold=0.5)