In [1]:
#Multi-Modal Explainanble customer churn prediction system

In [2]:
!pip install sentence-transformers transformers torch torchvision
!pip install shap

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch)
  Downloading nvidia_curand_cu12-10.3.5

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from transformers import AutoTokenizer, AutoModel
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score
import shap
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings("ignore")


In [4]:
#Data Generation and preprocessing
class DataGenerator:
  #GENERATE SYNTHETIC MULTI-MODAL CUSTOMER DATA FOR DEMO
  def __init__(self, n_customers=10000, sequence_length=30, random_state=42):
    np.random.seed(random_state)
    torch.manual_seed(random_state)
    self.n_customers = n_customers
    self.sequence_length = sequence_length

  def generate_behavioural_sequences(self):
    #Generate behavioural sequences (e.g, daily app usage, clicks, time spent)
    #generate patterns for churned vs non churned users
    behavioural_data = []
    labels = []

    for i in range(self.n_customers):
      # determine if customer will churn (30% churn rate)
      will_churn = np.random.random() < 0.3

      if will_churn:
        #Declining engagement pattern
        base_activity = np.random.exponential(2, self.sequence_length)
        decay_factor = np.linspace(1, 0.1, self.sequence_length)
        activity = base_activity * decay_factor

      else:
        #stable or increasing engagement
        base_activity = np.random.exponential(3, self.sequence_length)
        growth_factor = np.linspace(0.1, 1.2, self.sequence_length)
        activity = base_activity * growth_factor

      #Add noise and multiple behavioural features
      features = np.column_stack([
          activity, #daily usage hours
          np.random.poisson(activity * 5), #daily clicks
          np.random.exponential(activity), # session_duration
          np.random.binomial(1, activity / np.max(activity)),
      ])

      behavioural_data.append(features)
      labels.append(int(will_churn))

    return np.array(behavioural_data), np.array(labels)

  def generate_textual_data(self, labels):
    #generate customer support logs and feedback text
     positive_phrases = [
         "Great service, love the features",
            "Excellent experience, highly recommend",
            "Amazing platform, very satisfied",
            "Outstanding support team",
            "Perfect for my needs"
     ]

     negative_phrases = [
          "Having issues with the platform",
            "Disappointed with recent changes",
            "Considering canceling subscription",
            "Too expensive for what it offers",
            "Technical problems persist"
      ]

     textual_data = []
     for label in labels:
        if label == 1: # churned cus
            text = np.random.choice(negative_phrases)
        else: # Non-Churned
            text = np.random.choice(positive_phrases)

        #add some randomness
        if np.random.random() < 0.2:
           #20% chance of opposite sentiment
           if label == 1:
            text = np.random.choice(positive_phrases)
           else:
            text = np.random.choice(negative_phrases)

        textual_data.append(text)

     return np.array(textual_data)

  def generate_structured_data(self, labels):
    #generate structural customer features
    structured_data = []

    for label in labels:
      #create correlated features with churn
      if label == 1: # churned cus
          tenure = np.random.exponential(5) #shorter tenure
          monthly_changes = np.random.normal(80, 20) #higher charges
          support_tickets = np.random.poisson(3) #more tickets
          plan_changes = np.random.poisson(2) #more plan changes
      else: # Non-Churned
          tenure = np.random.exponential(15) #longer tenure
          monthly_changes = np.random.normal(50, 15) #lower charges
          support_tickets = np.random.poisson(1) #less tickets
          plan_changes = np.random.poisson(0.5) #less plan changes

      features = [
          tenure,
          monthly_changes,
          support_tickets,
          plan_changes,
          np.random.choice([0,1]), #auto pay
          np.random.choice([0, 1, 2]) #contract type
      ]

      structured_data.append(features)

    return np.array(structured_data)

In [5]:
#Self supervised learning component
class Time2Vec(nn.Module):
  #Time2Vec encoding for temporal sequences

  def __init__(self, input_dim, output_dim):
    super(Time2Vec, self).__init__()
    self.linear = nn.Linear(input_dim, output_dim - 1)
    self.periodic = nn.Linear(input_dim, 1)

  def forward(self, x):
    # x shape: (batch_size, seq_len, input_dim)
    linear_out = self.linear(x)
    periodic_out = torch.sin(self.periodic(x))
    return torch.cat([linear_out, periodic_out], dim=-1)

In [6]:
from operator import neg
class SimCLRBehavioural(nn.Module):
  #SimClR- inspired self-supervised learning for behavioural sequences

  def __init__(self, input_dim, hidden_dim=128, projection_dim=64):
    super(SimCLRBehavioural, self).__init__()

    #encoder
    self.encoder = nn.Sequential(
        nn.Linear(input_dim, hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, projection_dim)

    )

    #projection head
    self.projection_head = nn.Sequential(
        nn.Linear(projection_dim, hidden_dim),
        nn.ReLU(),
        nn.Linear(hidden_dim, projection_dim)
    )

  def forward(self, x):
    #x shape: (batch_size, seq_len, input_dim)
    #pool over sequence dimension
    x_pooled = torch.mean(x, dim=1)

    #encode
    encoded = self.encoder(x_pooled)

    #project
    projected = self.projection_head(encoded)

    return encoded, projected

  def contrastive_loss(self, z1, z2, temperature=0.5):
    #calculate contrastive loss btw pairs
    batch_size = z1.size[0]

    #normalize embeddings
    z1_norm = F.normalize(z1, dim=1)
    z2_norm = F.normalize(z2, dim=1)

    #compute similarity matrix
    sim_matrix = torch.matmul(z1_norm, z2_norm.T / temperature)

    #create positive mask
    pos_mask = torch.eye(batch_size, device=z1.device).bool()

    #compute loss
    exp_sim = torch.exp(sim_matrix)
    pos_sim = exp_sim[pos_mask]
    neg_sim = exp_sim[pos_mask].view(batch_size, -1).sum(dim=1)

    loss = -torch.log(pos_sim / (pos_sim + neg_sim))

    return loss.mean()

In [7]:
#Temporal transformer architecture
class TemporalTransformer(nn.Module):
  #transformer with Time2Vec for sequential behavioural modeling
  def __init__(self, input_dim, d_model=128, nhead=8, num_layers=3, dropout=0.1):
    super(TemporalTransformer, self).__init__()

    self.d_model = d_model
    self.input_projections = nn.Linear(input_dim, d_model)
    self.time2vec = Time2Vec(input_dim, d_model)

    #Transformer layers
    encoder_layer = nn.TransformerEncoderLayer(
        d_model=d_model,
        nhead=nhead,
        dim_feedforward= d_model * 4,
        dropout=dropout,
        batch_first=True
    )
    self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

    #output projection
    self.output_projection = nn.Linear(d_model, d_model)

  def forward(self, x):
    #x shape: (batch_size, seq_len, input_dim)
    batch_size, seq_len, input_dim = x.shape

    #apply TimeVec encoding
    time_encoded = self.time2vec(x)

    #apply transformer
    transformer_out = self.transformer(time_encoded)

    #global average pooling
    pooled = torch.mean(transformer_out, dim=1)

    #output projection
    output = self.output_projection(pooled)

    return output

In [8]:
#Textual feature extraction
class TextualFeatureExtractor:
  #extract sentiment and intent features from customer text using LLM embeddings

  def __init__(self, model_name = "sentence-transformers/all-MiniLM-L6-v2"):
    self.tokenizer = AutoTokenizer.from_pretrained(model_name)
    self.model = AutoModel.from_pretrained(model_name)
    self.model.eval()

  def extract_features(self, texts, batch_size=32):
    #extract embeddings from text data
    all_embeddings = []

    for i in range(0, len(texts), batch_size):
      batch_text = texts[i:i+batch_size]

      #tokenize
      inputs = self.tokenizer(
          batch_text,
          padding=True,
          truncation=True,
          max_length=512,
          return_tensors="pt"
      )

      #extract embeddings
      with torch.no_grad():
        outputs = self.model(**inputs)
        embeddings = outputs.last_hidden_state.mean(dim=1)
        all_embeddings.append(embeddings)

    return torch.cat(all_embeddings, dim=0)


In [9]:
#Multi-Modal Fusion Model
class MultiModalChurnPredictor(nn.Module):
  #Late-Fusion multi-modal model for churn prediction
  def __init__(self, behavioural_dim=4, textual_dim=384, structured_dim=6,hidden_dim=128, dropout=0.2):
    super(MultiModalChurnPredictor, self).__init__()

    #Behavioural Pathway
    self.behavioural_transformer = TemporalTransformer(input_dim=behavioural_dim, d_model=hidden_dim)

    #Textual Pathway
    self.textual_projection = nn.Sequential(
        nn.Linear(textual_dim, hidden_dim),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(hidden_dim, hidden_dim)
    )

    #Structured Pathway
    self.structured_projection = nn.Sequential(
        nn.Linear(structured_dim, hidden_dim),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(hidden_dim, hidden_dim)
    )

    #fusion and classification
    self.fusion = nn.Sequential(
        nn.Linear(hidden_dim * 3, hidden_dim*2),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(hidden_dim * 2, hidden_dim),
        nn.ReLU(),
        nn.Dropout(dropout),
        nn.Linear(hidden_dim, 1)
    )

  def forward(self, behavioural_seq, textual_emb, structured_features):
    #Process each modality
    behavioural_repr = self.behavioural_transformer(behavioural_seq)
    textual_repr = self.textual_projection(textual_emb)
    structured_repr = self.structured_projection(structured_features)

    #late fusion
    fused_repr = torch.cat([behavioural_repr, textual_repr, structured_repr], dim=1)

    #classification
    output = self.fusion(fused_repr)

    return torch.sigmoid(output)


In [10]:
#Explainability Components
class ChurnExplainer:
  #SHAP-based explainability for churn predictions

  def __init__(self, model, feature_names):
    self.model = model
    self.feature_names = feature_names

  def explain_predictions(self, data_sample, background_data=None):
    #Generate SHAP explainations for predictions

    #create wrapper function for SHAP
   def model_wrapper(x):
    #convert to tensors and make predictions
    behavioural_seq = torch.tensor(x[:, :120].reshape(-1, 30, 4), dtype=torch.float32)
    textual_emb = torch.tensor(x[:, 120:504], dtype=torch.float32)
    structured_features = torch.tensor(x[:, 504:], dtype=torch.float32)


    self.model.eval()
    with torch.no_grad():
      predictions = self.model(behavioural_seq.to(self.model.fusion[0].weight.device), textual_emb.to(self.model.fusion[0].weight.device), structured_features.to(self.model.fusion[0].weight.device))

    return predictions.cpu().numpy()

    #USE SHAP KERNELEXPLAINER
    if background_data is None:
      background_data = data_sample[:100] #use subset as background

    explainer = shap.KernelExplainer(model_wrapper, background_data)
    shap_values = explainer.shap_values(data_sample)

    return shap_values

   def generate_counterfactuals(self, instance, target_prob=0.3, max_iterations=100):
    #Generate counterfactual explanations
    #simple gardient based conterfactual generation
    instance_tensor = torch.tensor(instance, dtype=torch.float32, requires_grad=True)

    #parse instance components
    behavioural_seq_tensor = instance_tensor[:120].reshape(1, 30, 4) # Reshape for single instance
    textual_emb_tensor = instance_tensor[120:504].unsqueeze(0) # Add batch dimension
    structured_features_tensor = instance_tensor[504:].unsqueeze(0) # Add batch dimension


    optimizer = torch.optim.Adam([instance_tensor], lr=0.01)

    for i in range(max_iterations):
      optimizer.zero_grad()

      #forward pass
      behavioural_seq_batch = behavioural_seq_tensor.to(self.model.fusion[0].weight.device)
      textual_emb_batch = textual_emb_tensor.to(self.model.fusion[0].weight.device)
      structured_features_batch = structured_features_tensor.to(self.model.fusion[0].weight.device)


      pred = self.model(behavioural_seq_batch, textual_emb_batch, structured_features_batch)

      #Loss: minimize distance to target probabillity
      loss = (pred - target_prob) ** 2
      loss.backward()
      optimizer.step()

      if abs(pred.item() - target_prob) < 0.05:
        break

    return instance_tensor.detach().numpy()

In [11]:
#Evaluation metrics
class ChurnEvaluator:
  #comprehensive evaluation including business metrics

  def __init__(self, avg_customer_values=100, churn_cost_multiplier=5):
    self.avg_customer_values = avg_customer_values
    self.churn_cost_multiplier = churn_cost_multiplier

  def evaluate_model(self, y_true, y_pred_proba, y_pred_binary=None):
    #Comprehensive model evaluation

    if y_pred_binary is None:
      y_pred_binary = (y_pred_proba > 0.5).astype(int)

    #Traditional metrics
    auc_score = roc_auc_score(y_true, y_pred_proba)
    f1 = f1_score(y_true, y_pred_binary)
    precision = precision_score(y_true, y_pred_binary)
    recall = recall_score(y_true, y_pred_binary)

    #Business metrics
    expected_revenue_loss = self.calculate_expected_revenue_loss(y_true, y_pred_proba)
    cost_reduction = self.calculate_cost_reduction(y_true, y_pred_binary)

    #Precision at different thresholds
    precision_at_k = self.precision_at_k(y_true, y_pred_proba, k_values=[10, 20, 30])

    return{
        'auc': auc_score,
        'f1': f1,
        'precision': precision,
        'recall': recall,
        'expected_revenue_loss': expected_revenue_loss,
        'cost_reduction': cost_reduction,
        'precision_at_k': precision_at_k
    }

  def calculate_expected_revenue_loss(self, y_true, y_pred_proba):
    #Calculate expected revenue loss based on churn probabilities
    expected_loss = np.sum(y_pred_proba * self.avg_customer_values)
    actual_loss = np.sum(y_true * self.avg_customer_values)
    return abs(expected_loss - actual_loss)

  def calculate_cost_reduction(self, y_true, y_pred_binary):
    #calculate cost reduction from intervention
    #assume intervention costs 20% of customer value but saves 80% if successful
    intervention_cost = 0.2 * self.avg_customer_values
    save_rate = 0.8 # 80% of interventions successful

    #True Positives: correctly identified churners
    tp = np.sum((y_true == 1) & (y_pred_binary == 1))

    #False Positives: incorrectly identified churners
    fp = np.sum((y_true == 0) & (y_pred_binary == 1))

    #cost of interventions
    intervention_costs = (tp + fp) * intervention_cost

    #revenue saved from successful interventions
    revenue_saved = tp * save_rate * self.avg_customer_values

    #net cost reduction
    return revenue_saved - intervention_costs

  def precision_at_k(self, y_true, y_pred_proba, k_values):
    #calculate precision at top  k predictions
    results = {}

    #Sort by Prediction probability
    sorted_indices = np.argsort(y_pred_proba)[::-1]

    for k in k_values:
      if k > len(y_true):
        k = len(y_true)


      top_k_indices = sorted_indices[:k]
      top_k_true = y_true[top_k_indices]
      precision_k = np.sum(top_k_true) / k
      results[f'precision_at_{k}'] = precision_k

    return results

In [13]:
from typing import Counter
#TRAINING PIPELINE
class ChurnPredictionPipeline:
  #complete training and evaluation
  def __init__(self, device='cuda' if torch.cuda.is_available() else 'cpu'):
    self.device = device
    self.model = None
    self.scaler = None
    self.textual_extractor = None
    self.explainer = None
    self.evaluator = ChurnEvaluator()

  def prepare_data(self, n_customers=10000, test_size=0.2):
    #Generate and prepare multi-modal data
    print("Generating synthetic data..")

    #Generate data
    data_gen = DataGenerator(n_customers=n_customers)
    behavioural_data, labels = data_gen.generate_behavioural_sequences()
    textual_data = data_gen.generate_textual_data(labels)
    structured_data = data_gen.generate_structured_data(labels)

    #Extract textual features
    print("Extracting textual features...")
    self.textual_extractor = TextualFeatureExtractor()
    # Explicitly convert textual_data to a list of strings
    textual_embeddings = self.textual_extractor.extract_features(textual_data.tolist())

    #scale structured data
    self.scaler = StandardScaler()
    structured_data_scaled = self.scaler.fit_transform(structured_data)

    #Train-test split
    indices = np.arange(len(labels))
    train_idx, test_idx = train_test_split(indices, test_size=test_size, stratify=labels, random_state=42)

    #Prepare Datasets
    self.train_data = {
        'behavioural': torch.tensor(behavioural_data[train_idx], dtype=torch.float32),
        'textual': textual_embeddings[train_idx],
        'structured': torch.tensor(structured_data_scaled[train_idx], dtype=torch.float32),
        'labels': torch.tensor(labels[train_idx], dtype=torch.float32)
    }

    self.test_data = {
        'behavioural': torch.tensor(behavioural_data[test_idx], dtype=torch.float32),
        'textual': textual_embeddings[test_idx],
        'structured': torch.tensor(structured_data_scaled[test_idx], dtype=torch.float32),
        'labels': torch.tensor(labels[test_idx], dtype=torch.float32)
    }

    print(f"Data prepared: {len(train_idx)} train, {len(test_idx)} test samples")
    return self.train_data, self.test_data

  def train_model(self, epochs=50, batch_size=64, learning_rate=0.001):
    #Train the multi-modal churn prediction model
    print("Training model...")
    #initialize model
    self.model = MultiModalChurnPredictor(
        behavioural_dim=4,
        textual_dim=384,
        structured_dim=6
    ).to(self.device)

    #loss and optimizer
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate)

    #Training Loop
    train_losses = []

    for epoch in range(epochs):
      self.model.train()
      total_loss=0

      #create mini-batches
      n_samples = len(self.train_data['labels'])
      indices = torch.randperm(n_samples)

      for i in range(0, n_samples, batch_size):
        batch_indices = indices[i:i+batch_size]

        #get batch data
        Behavioural_batch = self.train_data['behavioural'][batch_indices].to(self.device)
        textual_batch = self.train_data['textual'][batch_indices].to(self.device)
        structured_batch = self.train_data['structured'][batch_indices].to(self.device)
        labels_batch = self.train_data['labels'][batch_indices].to(self.device)

        #forward pass
        optimizer.zero_grad()
        outputs = self.model(Behavioural_batch, textual_batch, structured_batch)
        loss = criterion(outputs.squeeze(), labels_batch)

        #backward pass
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
      avg_loss = total_loss / (n_samples // batch_size)
      train_losses.append(avg_loss)

      if epoch % 10 == 0:
        print(f"Epoch {epoch}, Loss: {avg_loss:.4f}")

    print("Training complete!")
    return train_losses

  def evaluate_model(self):
    #evaluate the trained model
    print("Evaluating model...")

    self.model.eval()
    with torch.no_grad():
      #get predictions
      behavioural_test = self.test_data['behavioural'].to(self.device)
      textual_test = self.test_data['textual'].to(self.device)
      structured_test = self.test_data['structured'].to(self.device)

      predictions = self.model(behavioural_test, textual_test, structured_test)
      predictions = predictions.squeeze().cpu().numpy()

      #true labels
      y_true = self.test_data['labels'].cpu().numpy()

      #evaluate
      results = self.evaluator.evaluate_model(y_true, predictions)

      #print results
      print("\n= Model Evaluation Results =")
      print(f"AUC-ROC: {results['auc']:.4f}")
      print(f"F1 Score: {results['f1']:.4f}")
      print(f"Precision: {results['precision']:.4f}")
      print(f"Recall: {results['recall']:.4f}")
      print(f"Expected Revenue Loss: {results['expected_revenue_loss']:.2f}")
      print(f"Cost Reduction: ${results['cost_reduction']:.2f}")
      print(f"Precision at K: {results['precision_at_k']}")

      return results, predictions

  def setup_explainability(self):
      #Setup explainability components
      print("Setting up explainability...")

      #prepare flattened data for SHAP
      behavioural_flat = self.test_data['behavioural'].reshape(len( self.test_data['behavioural']), -1)
      textual_flat = self.test_data['textual']
      structured_flat = self.test_data['structured']


      #combine all features
      combined_features = torch.cat([behavioural_flat, textual_flat, structured_flat], dim=1)

      #create feature names
      feature_names = []
      for i in range(30): #30 time steps
        for j in range(4): #4 behavioural features
          feature_names.append(f'behavioural_t{i}_f{j}')
      for i in range(384): #384 textual embedding dimension
          feature_names.append(f'textual_emb_{i}')
      for i, name in enumerate(['tenure', 'monthly_charges', 'support_tickets', 'plan_changes', 'auto_pay', 'contract_type']):
        feature_names.append(f'structured_{name}')

      return combined_features.numpy(), feature_names


  def generate_explanations(self, sample_size=10):
    #Generate explanations for sample predictions
    print("Generating explanations..")

    # Get combined features and feature names
    combined_features =  self.setup_explainability()

    #select random samples
    sample_indices = np.random.choice(len(combined_features), sample_size, replace=False)
    sample_data = combined_features[sample_indices]

    #Generate SHAP explanations
    shap_values = self.explainer.explain_predictions(sample_data)


    #generate conterfactual for one sample
    counterfactual = self.explainer.generate_counterfactuals(sample_data[0])

    return shap_values, counterfactual

#MAIN EXECUTION
def main():
  #main execution function
  #initialize pipeline
  pipeline = ChurnPredictionPipeline()
  #prepare data
  train_data, test_data = pipeline.prepare_data(n_customers=5000)
  #train model
  train_losses = pipeline.train_model(epochs=30, batch_size=32)
  #evaluate model
  results, predictions = pipeline.evaluate_model()
  #generate explanations
  shap_values, counterfactual = pipeline.generate_explanations(sample_size=5)
  #plot training history
  plt.figure(figsize=(10, 6))
  plt.plot(train_losses)
  plt.title('Training Loss Over Time')
  plt.xlabel('Epochs')
  plt.ylabel('Loss')
  plt.show()

  #plot prediction distribution
  plt.figure(figsize=(10, 6))
  plt.hist(predictions, bins=50, alpha=0.7, label='Predictions')
  plt.axvline(x=0.5, color='red', linestyle='--', label='Decision Threshold')
  plt.title('Distribution of Churn Predictions')
  plt.xlabel('Churn Probability')
  plt.ylabel('Frequency')
  plt.legend()
  plt.show()

  print("\n== Pipeline Completed Successfully ==")
  print("The multi-modal churn Prediction system is ready for deployment")


if __name__ == "__main__":
  main()

Generating synthetic data..
Extracting textual features...
Data prepared: 4000 train, 1000 test samples
Training model...
Epoch 0, Loss: 0.1715
Epoch 10, Loss: 0.1330
Epoch 20, Loss: 0.1313
Training complete!
Evaluating model...

= Model Evaluation Results =
AUC-ROC: 0.9876
F1 Score: 0.9122
Precision: 0.9278
Recall: 0.8970
Expected Revenue Loss: 835.83
Cost Reduction: $15780.00
Precision at K: {'precision_at_10': np.float32(1.0), 'precision_at_20': np.float32(1.0), 'precision_at_30': np.float32(1.0)}
Generating explanations..
Setting up explainability...


ValueError: Cannot take a larger sample than population when 'replace=False'