In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import tensorflow as tf
import mlflow
import mlflow.tensorflow
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import os
import psycopg2
from sqlalchemy import create_engine
from dotenv import load_dotenv

In [2]:
env_path = os.path.abspath("../.env")
load_dotenv(dotenv_path=env_path)

DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")

engine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

df = pd.read_sql("SELECT * FROM feature.marketing", engine)
df

Unnamed: 0,customer_id,total_clicks,total_impressions,ctr
0,AA23839,109,1,109.0
1,AA22508,160,1,160.0
2,AA1284,665,1,665.0
3,AA19211,273,1,273.0
4,AA22839,150,2,75.0
...,...,...,...,...
41183,AA28007,647,2,323.5
41184,AA9274,18,2,9.0
41185,AA10943,26,1,26.0
41186,AA2644,262,2,131.0


# Load marketing data from postgreSQL

In [3]:
# Create database connection
engine = create_engine(
    f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
)

# Load data from database
df = pd.read_sql("SELECT * FROM feature.marketing", engine)
print(f"‚úÖ Data loaded: {len(df)} rows")
df

‚úÖ Data loaded: 41188 rows


Unnamed: 0,customer_id,total_clicks,total_impressions,ctr
0,AA23839,109,1,109.0
1,AA22508,160,1,160.0
2,AA1284,665,1,665.0
3,AA19211,273,1,273.0
4,AA22839,150,2,75.0
...,...,...,...,...
41183,AA28007,647,2,323.5
41184,AA9274,18,2,9.0
41185,AA10943,26,1,26.0
41186,AA2644,262,2,131.0


In [4]:
df.describe()

Unnamed: 0,total_clicks,total_impressions,ctr
count,41188.0,41188.0,41188.0
mean,258.28501,2.567593,165.464914
std,259.279249,2.770014,199.482442
min,0.0,1.0,0.0
25%,102.0,1.0,47.5
50%,180.0,2.0,104.0
75%,319.0,3.0,207.0
max,4918.0,56.0,4918.0


# Set feature and target

In [5]:
X = df[["ctr"]]  # feature: ctr
y = (df["ctr"] > 0.1).astype(int) # proxy target: 1 if ctr > 0.1 else 0

# Scaling and split data

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X=X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Build Tensorflow model

In [7]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(64, activation='relu', input_shape=(1,)),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1 , activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[tf.keras.metrics.AUC(name='auc')]
)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                128       
                                                                 
 dense_1 (Dense)             (None, 32)                2080      
                                                                 
 dense_2 (Dense)             (None, 1)                 33        
                                                                 
Total params: 2241 (8.75 KB)
Trainable params: 2241 (8.75 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                128       
                                                                 
 dense_1 (Dense)        

2025-12-25 18:49:39.691029: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M4
2025-12-25 18:49:39.691370: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2025-12-25 18:49:39.691374: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.92 GB
2025-12-25 18:49:39.691568: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-12-25 18:49:39.691835: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


# Train and log to mlflow

In [8]:
# Set tracking URI - gunakan path absolut ke project root mlruns
import os
import shutil
project_root = os.path.abspath("..")
mlruns_path = os.path.join(project_root, "mlruns")
mlflow.set_tracking_uri(f"file:{mlruns_path}")
mlflow.set_experiment("marketing_model_notebook")

print("="*60)
print("üöÄ MEMULAI TRAINING MODEL TENSORFLOW")
print("="*60)
print(f"üìÅ MLflow tracking: {mlruns_path}")

with mlflow.start_run() as run:
    print(f"\nüìã Informasi Training:")
    print(f"   - Training samples: {len(X_train)}")
    print(f"   - Test samples: {len(X_test)}")
    print(f"   - Epochs: 10")
    print(f"   - Batch size: 32")
    print(f"   - Validation split: 20%")
    
    print(f"\nüîÑ Mulai training model...")
    print("-"*60)
    
    # Train model dengan verbose=1 untuk melihat progress
    history = model.fit(
        X_train,
        y_train,
        epochs=10,
        batch_size=32,
        validation_split=0.2,
        verbose=1  # Tampilkan progress bar dan metrics per epoch
    )
    
    print("-"*60)
    print(f"‚úÖ Training selesai!\n")
    
    # Tampilkan summary metrics dari training
    print(f"üìä Training History Summary:")
    print(f"   - Final training loss: {history.history['loss'][-1]:.4f}")
    print(f"   - Final training AUC: {history.history['auc'][-1]:.4f}")
    print(f"   - Final validation loss: {history.history['val_loss'][-1]:.4f}")
    print(f"   - Final validation AUC: {history.history['val_auc'][-1]:.4f}")
    
    # Evaluate pada test set
    print(f"\nüß™ Evaluasi pada Test Set...")
    loss, auc = model.evaluate(X_test, y_test, verbose=1)
    
    print(f"\nüìà Test Set Results:")
    print(f"   - Test Loss: {loss:.4f}")
    print(f"   - Test AUC: {auc:.4f}")
    
    # Log metrics ke MLflow
    print(f"\nüíæ Logging metrics ke MLflow...")
    mlflow.log_metric("test_auc", auc)
    mlflow.log_metric("test_loss", loss)
    mlflow.log_metric("final_train_loss", history.history['loss'][-1])
    mlflow.log_metric("final_train_auc", history.history['auc'][-1])
    mlflow.log_metric("final_val_loss", history.history['val_loss'][-1])
    mlflow.log_metric("final_val_auc", history.history['val_auc'][-1])
    
    # Simpan model - manual copy ke artifacts directory
    print(f"\nüíæ Menyimpan model ke MLflow...")
    
    # Get experiment info
    experiment = mlflow.get_experiment_by_name("marketing_model_notebook")
    run_id = run.info.run_id
    
    # Construct artifacts directory path
    artifacts_dir = os.path.join(mlruns_path, experiment.experiment_id, run_id, "artifacts", "model")
    os.makedirs(artifacts_dir, exist_ok=True)
    
    # Simpan model langsung ke artifacts directory
    model_path = os.path.join(artifacts_dir, "marketing_model.keras")
    model.save(model_path)
    print(f"   ‚úÖ Model tersimpan di: {model_path}")
    
    # Summary akhir
    print("\n" + "="*60)
    print("üéâ TRAINING SELESAI!")
    print("="*60)
    print(f"üìä Run ID: {run_id}")
    print(f"üìà Test AUC: {auc:.4f}")
    print(f"üìâ Test Loss: {loss:.4f}")
    print(f"üîó MLflow UI: http://localhost:5399")
    print(f"üìÅ Experiment: marketing_model_notebook")
    print(f"üìÇ MLruns path: {mlruns_path}")
    print(f"üì¶ Model artifact: {experiment.experiment_id}/{run_id}/artifacts/model/marketing_model.keras")
    print("="*60)


üöÄ MEMULAI TRAINING MODEL TENSORFLOW
üìÅ MLflow tracking: /Users/miftahhadiyannoor/Documents/Kredivo-Project/mlruns

üìã Informasi Training:
   - Training samples: 32950
   - Test samples: 8238
   - Epochs: 10
   - Batch size: 32
   - Validation split: 20%

üîÑ Mulai training model...
------------------------------------------------------------
Epoch 1/10
Epoch 1/10


2025-12-25 18:49:40.313900: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/10
Epoch 2/10
Epoch 3/10
Epoch 3/10
Epoch 4/10
Epoch 4/10
Epoch 5/10
Epoch 5/10
Epoch 6/10
Epoch 6/10
Epoch 7/10
Epoch 7/10
Epoch 8/10
Epoch 8/10
Epoch 9/10
Epoch 9/10
Epoch 10/10
Epoch 10/10
------------------------------------------------------------
‚úÖ Training selesai!

üìä Training History Summary:
   - Final training loss: 0.0005
   - Final training AUC: 0.5000
   - Final validation loss: 0.0031
   - Final validation AUC: 0.5000

üß™ Evaluasi pada Test Set...
  1/258 [..............................] - ETA: 5s - loss: 1.3634e-05 - auc: 0.0000e+00------------------------------------------------------------
‚úÖ Training selesai!

üìä Training History Summary:
   - Final training loss: 0.0005
   - Final training AUC: 0.5000
   - Final validation loss: 0.0031
   - Final validation AUC: 0.5000

üß™ Evaluasi pada Test Set...

üìà Test Set Results:
   - Test Loss: 0.0013
   - Test AUC: 0.5000

üíæ Logging metrics ke MLflow...

üíæ Menyimpan model ke MLflow...

üìà Test S

# Access to mlflow

In [9]:
import tensorflow as tf
import os
import glob

print("="*60)
print("üîç MENCARI DAN LOAD MODEL DARI MLFLOW")
print("="*60)

# Set mlflow tracking URI - gunakan path absolut yang sama dengan training
project_root = os.path.abspath("..")
mlruns_path = os.path.join(project_root, "mlruns")
mlflow.set_tracking_uri(f"file:{mlruns_path}")

print(f"üìÅ MLflow tracking: {mlruns_path}")

try:
    experiment = mlflow.get_experiment_by_name("marketing_model_notebook")

    if experiment is None:
        raise ValueError("‚ùå Experiment 'marketing_model_notebook' tidak ditemukan!\n"
                        "   Silakan jalankan cell training terlebih dahulu.")
    
    print(f"\n‚úÖ Experiment ditemukan:")
    print(f"   - Experiment ID: {experiment.experiment_id}")
    print(f"   - Experiment Name: {experiment.name}")
    
    # Cari semua runs
    runs = mlflow.search_runs(
        experiment_ids=[experiment.experiment_id],
        order_by=["start_time DESC"],
        max_results=5
    )

    if len(runs) == 0:
        raise ValueError("‚ùå Tidak ada runs dalam experiment ini!\n"
                        "   Silakan jalankan cell training terlebih dahulu.")
    
    print(f"\nüìä Ditemukan {len(runs)} run(s)")
    print(f"   Mencari run dengan model artifact...")
    
    # Cari run yang memiliki model artifact
    model_loaded = False
    
    for idx, run_row in runs.iterrows():
        run_id = run_row["run_id"]
        run_name = run_row.get("tags.mlflow.runName", "N/A")
        
        # Coba beberapa kemungkinan path
        possible_paths = [
            os.path.join(mlruns_path, experiment.experiment_id, run_id, "artifacts/model/marketing_model.keras"),
            os.path.join(mlruns_path, experiment.experiment_id, run_id, "artifacts/model/temp_marketing_model.keras"),
            os.path.join(mlruns_path, experiment.experiment_id, run_id, "artifacts/model"),
        ]
        
        # Cek juga dengan glob untuk mencari file .keras
        glob_pattern = os.path.join(mlruns_path, experiment.experiment_id, run_id, "artifacts/**/*.keras")
        keras_files = glob.glob(glob_pattern, recursive=True)
        
        if keras_files:
            possible_paths.extend(keras_files)
        
        for model_path in possible_paths:
            if os.path.exists(model_path):
                # Jika path adalah direktori, cari file .keras di dalamnya
                if os.path.isdir(model_path):
                    # Cari file .keras dalam direktori
                    keras_in_dir = glob.glob(os.path.join(model_path, "*.keras"))
                    if keras_in_dir:
                        model_path = keras_in_dir[0]
                    else:
                        continue
                
                try:
                    print(f"\nüîÑ Mencoba load model dari run: {run_name}")
                    print(f"   Path: {model_path}")
                    
                    # Load model
                    loaded_model = tf.keras.models.load_model(model_path)
                    
                    print("\n" + "="*60)
                    print("‚úÖ MODEL BERHASIL DI-LOAD!")
                    print("="*60)
                    print(f"üìä Run ID: {run_id}")
                    print(f"üìõ Run Name: {run_name}")
                    print(f"üìÖ Run Date: {run_row['start_time']}")
                    print(f"üìÅ Model path: {model_path}")
                    
                    # Metrics
                    if 'metrics.test_auc' in run_row:
                        print(f"üìà Test AUC: {run_row['metrics.test_auc']:.4f}")
                    if 'metrics.test_loss' in run_row:
                        print(f"üìâ Test Loss: {run_row['metrics.test_loss']:.4f}")
                    
                    print("\nüèóÔ∏è Model Architecture:")
                    loaded_model.summary()
                    
                    # Test prediction
                    print("\nüîÆ Sample Predictions (5 test samples):")
                    predictions = loaded_model.predict(X_test[:5], verbose=0)
                    for i, pred in enumerate(predictions):
                        actual = y_test.iloc[i] if hasattr(y_test, 'iloc') else y_test[i]
                        print(f"   Sample {i+1}: Prediction={pred[0]:.4f}, Actual={actual}")
                    
                    print("="*60)
                    
                    model_loaded = True
                    break
                    
                except Exception as load_error:
                    print(f"   ‚ö†Ô∏è Gagal load dari path ini: {load_error}")
                    continue
        
        if model_loaded:
            break
    
    if not model_loaded:
        print("\n" + "="*60)
        print("‚ùå TIDAK ADA MODEL YANG TERSIMPAN")
        print("="*60)
        print("\nüìã Runs yang ditemukan:")
        for idx, run_row in runs.iterrows():
            run_id = run_row["run_id"]
            run_name = run_row.get("tags.mlflow.runName", "N/A")
            start_time = run_row["start_time"]
            print(f"\n   Run: {run_name}")
            print(f"   ID: {run_id}")
            print(f"   Date: {start_time}")
            
            # Cek artifacts directory
            artifacts_dir = os.path.join(mlruns_path, experiment.experiment_id, run_id, "artifacts")
            if os.path.exists(artifacts_dir):
                files = os.listdir(artifacts_dir)
                print(f"   Artifacts: {files if files else 'KOSONG ‚ùå'}")
            else:
                print(f"   Artifacts: Direktori tidak ada ‚ùå")
        
        print("\nüí° SOLUSI:")
        print("   1. Jalankan cell training (cell ke-12) untuk train dan save model")
        print("   2. Tunggu sampai muncul pesan '‚úÖ Model saved'")
        print("   3. Jalankan kembali cell ini untuk load model")
        print("="*60)

except Exception as e:
    print(f"\n‚ùå Error: {e}")
    import traceback
    traceback.print_exc()


üîç MENCARI DAN LOAD MODEL DARI MLFLOW
üìÅ MLflow tracking: /Users/miftahhadiyannoor/Documents/Kredivo-Project/mlruns

‚úÖ Experiment ditemukan:
   - Experiment ID: 628625969179089555
   - Experiment Name: marketing_model_notebook

üìä Ditemukan 1 run(s)
   Mencari run dengan model artifact...

üîÑ Mencoba load model dari run: powerful-hen-526
   Path: /Users/miftahhadiyannoor/Documents/Kredivo-Project/mlruns/628625969179089555/8b289fb1a6114e598f70a4b728661255/artifacts/model/marketing_model.keras

‚úÖ MODEL BERHASIL DI-LOAD!
üìä Run ID: 8b289fb1a6114e598f70a4b728661255
üìõ Run Name: powerful-hen-526
üìÖ Run Date: 2025-12-25 10:49:40.074000+00:00
üìÅ Model path: /Users/miftahhadiyannoor/Documents/Kredivo-Project/mlruns/628625969179089555/8b289fb1a6114e598f70a4b728661255/artifacts/model/marketing_model.keras
üìà Test AUC: 0.5000
üìâ Test Loss: 0.0013

üèóÔ∏è Model Architecture:
Model: "sequential"
_________________________________________________________________
 Layer (type)