In [1]:
import os, sys

In [2]:
import flaml.automl
print(dir(flaml.automl))


['AutoML', 'AutoMLState', 'SearchState', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'automl', 'data', 'logger', 'logger_formatter', 'ml', 'model', 'size', 'spark', 'state', 'task', 'time_series', 'training_log']


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import dataset_for_examples

# from noventis.predictor import NoventisAutoML
# from noventis.predictor import ManualPredictor

from noventis.predictor import ManualPredictor

from noventis.data_cleaner.auto import NoventisDataCleaner

# Manual

In [4]:
import seaborn as sns
import pandas as pd


df_titanic = sns.load_dataset('titanic')

df_titanic = df_titanic.drop(columns=['deck', 'embark_town', 'alive'])

print("Dataset Titanic:")
print(df_titanic.head())
print("\nInfo Dataset:")
df_titanic.info()

Dataset Titanic:
   survived  pclass     sex   age  sibsp  parch     fare embarked  class  \
0         0       3    male  22.0      1      0   7.2500        S  Third   
1         1       1  female  38.0      1      0  71.2833        C  First   
2         1       3  female  26.0      0      0   7.9250        S  Third   
3         1       1  female  35.0      1      0  53.1000        S  First   
4         0       3    male  35.0      0      0   8.0500        S  Third   

     who  adult_male  alone  
0    man        True  False  
1  woman       False  False  
2  woman       False   True  
3  woman       False  False  
4    man        True   True  

Info Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   survived    891 non-null    int64   
 1   pclass      891 non-null    int64   
 2   sex         891 non-null    object  
 3   age        

In [5]:
import seaborn as sns
import pandas as pd

df_tips = sns.load_dataset('tips')

print("Dataset Tips:")
print(df_tips.head())
print("\nInfo Dataset:")
df_tips.info()

df_tips = pd.get_dummies(df_tips, columns=['sex', 'smoker', 'day', 'time'], drop_first=True)
print("\nDataset Tips Setelah di-encode:")
print(df_tips.head())

Dataset Tips:
   total_bill   tip     sex smoker  day    time  size
0       16.99  1.01  Female     No  Sun  Dinner     2
1       10.34  1.66    Male     No  Sun  Dinner     3
2       21.01  3.50    Male     No  Sun  Dinner     3
3       23.68  3.31    Male     No  Sun  Dinner     2
4       24.59  3.61  Female     No  Sun  Dinner     4

Info Dataset:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype   
---  ------      --------------  -----   
 0   total_bill  244 non-null    float64 
 1   tip         244 non-null    float64 
 2   sex         244 non-null    category
 3   smoker      244 non-null    category
 4   day         244 non-null    category
 5   time        244 non-null    category
 6   size        244 non-null    int64   
dtypes: category(4), float64(2), int64(1)
memory usage: 7.4 KB

Dataset Tips Setelah di-encode:
   total_bill   tip  size  sex_Female  smoker_No  day_Fri  day_Sat  d

In [6]:
import seaborn as sns
import pandas as pd

# from manual import ManualPredictor  
# from data_cleaner import NoventisDataCleaner 

print("Tahap 1: Memuat dan mempersiapkan data Titanic...")
df_titanic = sns.load_dataset('titanic')

df_titanic = df_titanic.drop(columns=['deck', 'embark_town', 'alive', 'who', 'adult_male'])
print("Data siap.")
print("-" * 50)

print("Tahap 2: Mengkonfigurasi NoventisDataCleaner...")
cleaner_config = {
    'impute': {'method': 'median'},  
    'encode': {'method': 'auto'},     
    'scale': True                     
}
data_cleaner = NoventisDataCleaner(pipeline_steps=cleaner_config)
print("DataCleaner siap.")
print("-" * 50)

print("Tahap 3: Menjalankan ManualPredictor dengan semua fitur canggih...")
predictor = ManualPredictor(
    model_name=['random_forest', 'lightgbm', 'xgboost'],
    task='classification',
    
    data_cleaner=data_cleaner,              # Integrasikan cleaner yang sudah kita siapkan
    tune_hyperparameters=True,              # Aktifkan tuning dengan Optuna
    n_trials=20,                            # Kita set 20 trial agar tidak terlalu lama untuk tes
    enable_feature_engineering=True,        # Aktifkan pembuatan fitur otomatis
    cv_strategy='stratified',               # Gunakan cross-validation 'stratified' (lebih cepat dari 'repeated' untuk tes)
    show_tuning_plots=False                 # Set ke False agar tidak terlalu banyak plot saat running awal
)

# Jalankan pipeline lengkap
results = predictor.run_pipeline(
    df=df_titanic, 
    target_column='survived',
    compare=True,  # Tampilkan tabel perbandingan hasil
    explain=True   # Tampilkan plot perbandingan metrik
)
print("-" * 50)


# --- 4. Menampilkan Hasil dan Analisis Mendalam ---
print("Tahap 4: Menampilkan hasil akhir dan analisis SHAP...")

# Dapatkan hasil dalam bentuk tabel DataFrame yang rapi
results_df = predictor.get_results_dataframe()
print("\n--- Tabel Hasil Akhir (Diurutkan dari Terbaik) ---")
print(results_df)

# Analisis model terbaik menggunakan SHAP
print("\n--- Analisis SHAP untuk Model Terbaik ---")
# Kita akan lihat bagaimana fitur 'age' dan 'fare' mempengaruhi prediksi
try:
    predictor.explain_model(plot_type='summary')
    predictor.explain_model(plot_type='dependence', feature='age')
    predictor.explain_model(plot_type='dependence', feature='fare')
except Exception as e:
    print(f"Tidak dapat membuat plot SHAP: {e}")

print("\nImplementasi selesai!")

INFO:root:🔧 Menerapkan feature engineering (Polynomial & Interaction)...


Tahap 1: Memuat dan mempersiapkan data Titanic...
Data siap.
--------------------------------------------------
Tahap 2: Mengkonfigurasi NoventisDataCleaner...
DataCleaner siap.
--------------------------------------------------
Tahap 3: Menjalankan ManualPredictor dengan semua fitur canggih...


INFO:root:✅ Feature engineering selesai. Shape data baru: (712, 19)
INFO:root:--- Memproses model: RANDOM_FOREST ---
INFO:root:🔬 Memulai hyperparameter tuning untuk RANDOM_FOREST...
[W 2025-09-17 02:12:06,762] Trial 1 failed with parameters: {'n_estimators': 977, 'max_depth': 19, 'min_samples_split': 15, 'min_samples_leaf': 4, 'class_weight': 'balanced'} because of the following error: ValueError("could not convert string to float: 'male'").
Traceback (most recent call last):
  File "/home/ahmadnafim/.conda/envs/noventis/lib/python3.10/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "/home/ahmadnafim/BCC/LaplaceProject/Noventis-Data/noventis/predictor/manual.py", line 256, in objective
    model.fit(X_train_fold, y_train_fold)
  File "/home/ahmadnafim/.conda/envs/noventis/lib/python3.10/site-packages/sklearn/base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/ahmadnafim/.conda/envs/no

[W 2025-09-17 02:12:06,769] Trial 0 failed with parameters: {'n_estimators': 724, 'max_depth': 15, 'min_samples_split': 8, 'min_samples_leaf': 4, 'class_weight': 'balanced'} because of the following error: ValueError("could not convert string to float: 'male'").
Traceback (most recent call last):
  File "/home/ahmadnafim/.conda/envs/noventis/lib/python3.10/site-packages/optuna/study/_optimize.py", line 201, in _run_trial
    value_or_values = func(trial)
  File "/home/ahmadnafim/BCC/LaplaceProject/Noventis-Data/noventis/predictor/manual.py", line 256, in objective
    model.fit(X_train_fold, y_train_fold)
  File "/home/ahmadnafim/.conda/envs/noventis/lib/python3.10/site-packages/sklearn/base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/ahmadnafim/.conda/envs/noventis/lib/python3.10/site-packages/sklearn/ensemble/_forest.py", line 359, in fit
    X, y = validate_data(
  File "/home/ahmadnafim/.conda/envs/noventis/lib/python3.10/site-packages

RuntimeError: Tidak ada model yang berhasil dilatih.