In [1]:
import pandas as pd
import os, sys
from sklearn.datasets import make_regression

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from noventis.predictor import NoventisAutoML

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd
from sklearn.datasets import make_classification, make_regression

# 1. Membuat Data untuk Klasifikasi (Prediksi Churn Pelanggan)
X_class, y_class = make_classification(
    n_samples=500,
    n_features=15,
    n_informative=5,
    n_redundant=2,
    n_classes=2,
    flip_y=0.05,
    random_state=42
)
df_class = pd.DataFrame(X_class, columns=[f'feature_{i}' for i in range(15)])
df_class['churn'] = y_class
df_class.to_csv('dummy_classification_churn.csv', index=False)
print("✅ File 'dummy_classification_churn.csv' berhasil dibuat.")


# 2. Membuat Data untuk Regresi (Prediksi Harga Rumah)
X_reg, y_reg = make_regression(
    n_samples=300,
    n_features=10,
    n_informative=6,
    noise=15,
    random_state=42
)
df_reg = pd.DataFrame(X_reg, columns=[f'attr_{i}' for i in range(10)])
df_reg['house_price'] = y_reg
df_reg.to_csv('dummy_regression_house_price.csv', index=False)
print("✅ File 'dummy_regression_house_price.csv' berhasil dibuat.")

✅ File 'dummy_classification_churn.csv' berhasil dibuat.
✅ File 'dummy_regression_house_price.csv' berhasil dibuat.


In [3]:
automl_classifier = NoventisAutoML(
    data='dummy_classification_churn.csv',
    target='churn',
    time_budget=30  #
)

results_class = automl_classifier.fit()

✅ Data berhasil dimuat dari file: dummy_classification_churn.csv
📊 Shape data: (500, 16)
📋 Kolom: ['feature_0', 'feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5', 'feature_6', 'feature_7', 'feature_8', 'feature_9', 'feature_10', 'feature_11', 'feature_12', 'feature_13', 'feature_14', 'churn']
✅ Tipe tugas terdeteksi: classification
✅ Data berhasil dibagi: Train=400, Test=100
📈 Target distribution: {0: np.int64(254), 1: np.int64(246)}
🚀 Memulai proses AutoML dengan FLAML...
⏳ Melatih model (Metrik: macro_f1, Waktu: 60s)...


INFO:flaml.tune.searcher.blendsearch:No low-cost partial config given to the search algorithm. For cost-frugal search, consider providing low-cost values for cost-related hps via 'low_cost_partial_config'. More info can be found at https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune
INFO:root:Running internal preprocessor to handle data types and missing values...
INFO:root:✅ Internal preprocessing complete. All data is now numeric.
INFO:root:--- Processing model: LOGISTIC_REGRESSION ---
INFO:root:Training finished in 0.02 seconds.
INFO:root:--- Processing model: RANDOM_FOREST ---



🔍 Memulai perbandingan dengan model lain...


INFO:root:Training finished in 0.46 seconds.
INFO:root:--- Processing model: XGBOOST ---
INFO:root:Training finished in 0.34 seconds.
INFO:root:--- Processing model: DECISION_TREE ---
INFO:root:Training finished in 0.02 seconds.
INFO:root:--- Processing model: LIGHTGBM ---
INFO:root:Training finished in 0.37 seconds.
INFO:root:--- Processing model: CATBOOST ---
INFO:root:Training finished in 3.85 seconds.
INFO:root:--- Processing model: GRADIENT_BOOSTING ---
INFO:root:Training finished in 1.00 seconds.
INFO:root:
--- Process Complete ---
INFO:root:🏆 Best Model: LIGHTGBM with f1_score = 0.9400
INFO:root:Saving model 'lightgbm' to Noventis_Results/best_model.pkl...
INFO:root:✅ Model saved successfully.


📄 Comparison report disimpan di: Noventis_Results/model_comparison_report.txt
📊 Hasil perbandingan model disimpan di direktori 'Noventis_Results'.
METRICS = f1_score


KeyError: 'Lightgbm'

In [None]:
manual_regressor = NoventisAutoML(
    data='dummy_regression_house_price.csv',
    target='house_price',
    models=['lightgbm', 'rf'], 
    compare=False,         
    explain=True
)

results_reg = manual_regressor.fit()

print("\n🏆 Proses Regresi Manual Selesai!")

INFO:root:Running internal preprocessor to handle data types and missing values...
INFO:root:✅ Internal preprocessing complete. All data is now numeric.
INFO:root:--- Processing model: LGBM ---
ERROR:root:Failed to process model lgbm: Model 'lgbm' is not recognized for task 'regression'.
INFO:root:--- Processing model: RF ---


ERROR:root:Failed to process model rf: Model 'rf' is not recognized for task 'regression'.


✅ Data berhasil dimuat dari file: dummy_regression_house_price.csv
📊 Shape data: (300, 11)
📋 Kolom: ['attr_0', 'attr_1', 'attr_2', 'attr_3', 'attr_4', 'attr_5', 'attr_6', 'attr_7', 'attr_8', 'attr_9', 'house_price']
✅ Tipe tugas terdeteksi: regression
✅ Data berhasil dibagi: Train=240, Test=60
📈 Target distribution: Range: -477.78 - 515.23
🚀 Memulai proses AutoML dengan FLAML...


RuntimeError: No models were trained successfully. Please check your data or configuration.

In [None]:
display(result_reg)

In [21]:
# import pandas as pd
# import numpy as np

# new_customers = pd.DataFrame(
#     np.random.rand(5, 15), 
#     columns=[f'feature_{i}' for i in range(15)]
# )

# predictor = NoventisAutoML(data='dummy_classification_churn.csv', target='churn')
# best_model = predictor.load_model('/home/ahmadnafim/BCC/LaplaceProject/Noventis-Data/examples/noventis_result_automl+givenModels/best_model.pkl')

# if best_model:
#     predictions = best_model.predict(new_customers)
#     print("\nHasil Prediksi Churn untuk Customer Baru:")
#     print(predictions)

In [22]:
comprehensive_run = NoventisAutoML(
    # --- Parameter Data & Tugas ---
    data='dummy_classification_churn.csv',  # Menggunakan data churn pelanggan
    target='churn',                          # Kolom target yang akan diprediksi
    task='classification',                   # Menetapkan tugas secara eksplisit

    # --- Parameter Kontrol Model & Perbandingan ---
    models=['lgbm', 'xgboost', 'rf'],        # Model manual yang akan jadi pembanding
    compare=True,                            # WAJIB True untuk membandingkan AutoML vs. `models`
    metrics='accuracy',                      # Metrik utama untuk menentukan model terbaik
    time_budget=60,                          # Batas waktu 60 detik untuk pencarian AutoML

    # --- Parameter Konfigurasi Output & Proses ---
    output_dir='Full_Run_Churn_Analysis',    # Menyimpan semua hasil di folder khusus
    explain=True,                            # Membuat semua plot dan laporan secara otomatis
    test_size=0.25,                          # Menggunakan 25% data untuk testing
    random_state=123                         # Menggunakan seed agar hasil bisa direproduksi
)

# Menjalankan seluruh pipeline
results = comprehensive_run.fit()

✅ Data berhasil dimuat dari file: dummy_classification_churn.csv
📊 Shape data: (500, 16)
📋 Kolom: ['feature_0', 'feature_1', 'feature_2', 'feature_3', 'feature_4', 'feature_5', 'feature_6', 'feature_7', 'feature_8', 'feature_9', 'feature_10', 'feature_11', 'feature_12', 'feature_13', 'feature_14', 'churn']
✅ Data berhasil dibagi: Train=375, Test=125
📈 Target distribution: {0: np.int64(254), 1: np.int64(246)}
🚀 Memulai proses AutoML dengan FLAML...
⏳ Melatih model (Metrik: accuracy, Waktu: 60s)...


INFO:flaml.tune.searcher.blendsearch:No low-cost partial config given to the search algorithm. For cost-frugal search, consider providing low-cost values for cost-related hps via 'low_cost_partial_config'. More info can be found at https://microsoft.github.io/FLAML/docs/FAQ#about-low_cost_partial_config-in-tune
INFO:root:--- Memproses model: LGBM ---
ERROR:root:Gagal memproses model lgbm: Model 'lgbm' tidak dikenali untuk task 'classification'.
INFO:root:--- Memproses model: XGBOOST ---


💾 Model berhasil disimpan di: Full_Run_Churn_Analysis/best_automl_model.pkl

🔍 Memulai perbandingan dengan model lain...


INFO:root:✅ Selesai: XGBOOST | Metrik utama: 0.8877
INFO:root:--- Memproses model: RF ---
ERROR:root:Gagal memproses model rf: Model 'rf' tidak dikenali untuk task 'classification'.
INFO:root:
--- Proses Selesai ---
INFO:root:🏆 Model Terbaik: XGBOOST dengan f1_score = 0.8877


⚠️  Peringatan: Melewati model 'lgbm' karena gagal saat training.
⚠️  Peringatan: Melewati model 'rf' karena gagal saat training.


INFO:root:Menyimpan pipeline lengkap 'xgboost' ke Full_Run_Churn_Analysis/best_model_without_automl.pkl...
INFO:root:✅ Pipeline berhasil disimpan.


📄 Comparison report disimpan di: Full_Run_Churn_Analysis/model_comparison_report.txt
📊 Hasil perbandingan model disimpan di direktori 'Full_Run_Churn_Analysis'.

🎉 Proses AutoML Selesai!
📊 Membuat visualisasi...
⚠️ Error saat membuat visualisasi: 'feature_importance'
⚠️ Error saat membuat summary: string indices must be integers
📊 Visualisasi berhasil dibuat dan disimpan di direktori 'Full_Run_Churn_Analysis'!
