In [1]:
pip install torch_geometric



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score, precision_recall_curve
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
import wandb

In [9]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score
import wandb
import warnings
warnings.filterwarnings('ignore')

In [16]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data
from torch_geometric.nn import SAGEConv
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score
import wandb
import warnings
warnings.filterwarnings('ignore')

# Baseline

In [3]:
try:
    from google.colab import userdata
    WANDB_API_KEY = userdata.get('WANDB')
    wandb.login(key=WANDB_API_KEY)
    print("Logged in to wandb using Colab secret")
except:
    print("Not in Colab or secret not set, using wandb login")

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkatrinpochtar[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Logged in to wandb using Colab secret


In [4]:
SWEEP_CONFIG = {
    'method': 'bayes',
    'metric': {
        'name': 'test_auc',
        'goal': 'maximize'
    },
    'parameters': {
        'model_type': {
            'values': ['xgboost', 'lightgbm']
        },
        'n_estimators': {
            'values': [100, 200, 300, 500]
        },
        'max_depth': {
            'values': [3, 5, 6, 8, 10]
        },
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 0.01,
            'max': 0.3
        }
    }
}

In [5]:
def create_temporal_dataset(df, observation_days=31, prediction_days=30):
    max_date = df['event_dt'].max()
    min_date = df['event_dt'].min()

    total_days = (max_date - min_date).days
    print(f"Total data span: {total_days} days ({min_date.date()} to {max_date.date()})")

    observation_end = max_date - pd.Timedelta(days=prediction_days)
    observation_start = observation_end - pd.Timedelta(days=observation_days)

    if observation_start < min_date:
        observation_start = min_date
        print(f"Warning: Adjusted observation start to {observation_start.date()}")

    print(f"Observation window: {observation_start.date()} to {observation_end.date()} ({observation_days} days)")
    print(f"Prediction window: {observation_end.date()} to {max_date.date()} ({prediction_days} days)")

    observation_data = df[
        (df['event_dt'] >= observation_start) &
        (df['event_dt'] < observation_end)
    ]

    prediction_data = df[df['event_dt'] >= observation_end]

    active_users_observation = set(observation_data['device_id'].unique())
    active_users_prediction = set(prediction_data['device_id'].unique())

    churned_users = active_users_observation - active_users_prediction

    print(f"Users in observation: {len(active_users_observation)}")
    print(f"Users in prediction: {len(active_users_prediction)}")
    print(f"Churned users: {len(churned_users)}")

    churn_labels = {}
    for user in active_users_observation:
        churn_labels[user] = 1 if user in churned_users else 0

    return observation_data, churn_labels

def extract_features(df):
    features = df.groupby('device_id').agg({
        'event_dt': ['min', 'max', 'count'],
        'session_id': 'nunique',
        'screen': 'nunique',
        'feature': 'nunique',
        'action': 'count'
    })

    features.columns = ['_'.join(col).strip() for col in features.columns]

    features['days_in_window'] = (
        features['event_dt_max'] - features['event_dt_min']
    ).dt.total_seconds() / 86400

    features['events_per_day'] = features['event_dt_count'] / features['days_in_window'].clip(lower=1)
    features['sessions_per_day'] = features['session_id_nunique'] / features['days_in_window'].clip(lower=1)
    features['events_per_session'] = features['event_dt_count'] / features['session_id_nunique'].clip(lower=1)
    features['screen_diversity'] = features['screen_nunique'] / features['event_dt_count']
    features['feature_diversity'] = features['feature_nunique'] / features['event_dt_count']

    first_day = features['event_dt_min'].min()
    features['days_since_first_seen'] = (
        features['event_dt_min'] - first_day
    ).dt.total_seconds() / 86400

    last_day = features['event_dt_max'].max()
    features['days_until_window_end'] = (
        last_day - features['event_dt_max']
    ).dt.total_seconds() / 86400

    features['recency_in_window'] = features['days_until_window_end'] / features['days_in_window'].clip(lower=1)

    return features

def prepare_data(df, observation_days=31, prediction_days=30):
    observation_data, churn_labels = create_temporal_dataset(
        df, observation_days, prediction_days
    )

    features = extract_features(observation_data)

    features = features.drop(columns=['event_dt_min', 'event_dt_max'])
    features['churn'] = features.index.map(churn_labels)
    features = features.dropna()

    X = features.drop(columns=['churn'])
    y = features['churn']

    return X, y

def calculate_metrics(y_true, y_pred, y_prob):
    metrics = {
        'auc_roc': roc_auc_score(y_true, y_prob),
        'precision': precision_score(y_true, y_pred),
        'recall': recall_score(y_true, y_pred),
        'f1': f1_score(y_true, y_pred)
    }

    precision_vals, recall_vals, _ = precision_recall_curve(y_true, y_prob)

    top_10_pct = int(0.1 * len(y_prob))
    top_indices = np.argsort(y_prob)[-top_10_pct:]
    metrics['precision_at_10'] = np.mean(y_true.iloc[top_indices])

    return metrics

def train_with_wandb(X_train, X_test, y_train, y_test, config=None):
    run = wandb.init(config=config)
    config = wandb.config

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    if config.model_type == 'xgboost':
        model = XGBClassifier(
            n_estimators=config.n_estimators,
            max_depth=config.max_depth,
            learning_rate=config.learning_rate,
            scale_pos_weight=len(y_train[y_train==0]) / len(y_train[y_train==1]),
            random_state=42
        )
    else:
        model = LGBMClassifier(
            n_estimators=config.n_estimators,
            max_depth=config.max_depth,
            learning_rate=config.learning_rate,
            scale_pos_weight=len(y_train[y_train==0]) / len(y_train[y_train==1]),
            random_state=42,
            verbose=-1
        )

    model.fit(X_train_scaled, y_train)

    y_train_prob = model.predict_proba(X_train_scaled)[:, 1]
    y_test_prob = model.predict_proba(X_test_scaled)[:, 1]

    y_train_pred = (y_train_prob > 0.5).astype(int)
    y_test_pred = (y_test_prob > 0.5).astype(int)

    train_metrics = calculate_metrics(y_train, y_train_pred, y_train_prob)
    test_metrics = calculate_metrics(y_test, y_test_pred, y_test_prob)

    wandb.log({
        'train_auc': train_metrics['auc_roc'],
        'train_precision': train_metrics['precision'],
        'train_recall': train_metrics['recall'],
        'train_f1': train_metrics['f1'],
        'train_precision_at_10': train_metrics['precision_at_10'],
        'test_auc': test_metrics['auc_roc'],
        'test_precision': test_metrics['precision'],
        'test_recall': test_metrics['recall'],
        'test_f1': test_metrics['f1'],
        'test_precision_at_10': test_metrics['precision_at_10']
    })

    if config.model_type == 'xgboost':
        importances = model.feature_importances_
        feature_importance = pd.DataFrame({
            'feature': X_train.columns,
            'importance': importances
        }).sort_values('importance', ascending=False).head(10)

        wandb.log({'feature_importance': wandb.Table(dataframe=feature_importance)})

    import joblib
    model_filename = f'best_{config.model_type}_model.pkl'
    joblib.dump({'model': model, 'scaler': scaler}, model_filename)

    artifact = wandb.Artifact(f'{config.model_type}_churn_model', type='model')
    artifact.add_file(model_filename)
    wandb.log_artifact(artifact)

    print(f"\n{config.model_type.upper()}")
    print(f"Train AUC: {train_metrics['auc_roc']:.4f} | Test AUC: {test_metrics['auc_roc']:.4f}")
    print(f"Test Precision: {test_metrics['precision']:.4f} | Recall: {test_metrics['recall']:.4f} | F1: {test_metrics['f1']:.4f}")
    print(f"Precision@10%: {test_metrics['precision_at_10']:.4f}")
    print(f"Model saved: {model_filename}")

    run.finish()
    return model

def main(config=None):
    df = pd.read_csv("/content/group_project/clean_data.csv")
    df['event_dt'] = pd.to_datetime(df['event_dt'])

    X, y = prepare_data(df, observation_days=31, prediction_days=30)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=y
    )

    print(f"\nDataset: {len(X)} users")
    print(f"Churn rate: {y.mean():.2%}")
    print(f"Train: {len(X_train)} | Test: {len(X_test)}")

    if config is None:
        config = {
            'model_type': 'xgboost',
            'n_estimators': 200,
            'max_depth': 6,
            'learning_rate': 0.1
        }

    model = train_with_wandb(X_train, X_test, y_train, y_test, config=config)
    return model

In [7]:
def run_sweep(count=10):
    sweep_id = wandb.sweep(SWEEP_CONFIG, project="churn-prediction")
    print(f"Sweep ID: {sweep_id}")
    print(f"Running {count} sweep iterations...")
    wandb.agent(sweep_id, function=main, count=count)

In [8]:
run_sweep(count=10)

Create sweep with ID: 0i16oavp
Sweep URL: https://wandb.ai/katrinpochtar/churn-prediction/sweeps/0i16oavp
Sweep ID: 0i16oavp
Running 10 sweep iterations...


[34m[1mwandb[0m: Agent Starting Run: 5ocncfcn with config:
[34m[1mwandb[0m: 	learning_rate: 0.03460340809611186
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	model_type: xgboost
[34m[1mwandb[0m: 	n_estimators: 500


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712



XGBOOST
Train AUC: 0.7409 | Test AUC: 0.7205
Test Precision: 0.6026 | Recall: 0.7365 | F1: 0.6629
Precision@10%: 0.6958
Model saved: best_xgboost_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.72049
test_f1,0.66286
test_precision,0.60258
test_precision_at_10,0.69578
test_recall,0.73654
train_auc,0.74086
train_f1,0.67501
train_precision,0.61283
train_precision_at_10,0.73376
train_recall,0.75123


[34m[1mwandb[0m: Agent Starting Run: ryygdoox with config:
[34m[1mwandb[0m: 	learning_rate: 0.16406396294762263
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	model_type: xgboost
[34m[1mwandb[0m: 	n_estimators: 200


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712



XGBOOST
Train AUC: 0.8789 | Test AUC: 0.7027
Test Precision: 0.5939 | Recall: 0.7102 | F1: 0.6469
Precision@10%: 0.6665
Model saved: best_xgboost_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.70268
test_f1,0.64687
test_precision,0.59393
test_precision_at_10,0.66655
test_recall,0.71016
train_auc,0.8789
train_f1,0.79908
train_precision,0.72175
train_precision_at_10,0.93224
train_recall,0.89497


[34m[1mwandb[0m: Agent Starting Run: v01c6rg9 with config:
[34m[1mwandb[0m: 	learning_rate: 0.28385102750891167
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	model_type: lightgbm
[34m[1mwandb[0m: 	n_estimators: 500


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712





LIGHTGBM
Train AUC: 0.7497 | Test AUC: 0.7172
Test Precision: 0.6000 | Recall: 0.7336 | F1: 0.6601
Precision@10%: 0.7016
Model saved: best_lightgbm_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.71716
test_f1,0.66008
test_precision,0.59996
test_precision_at_10,0.70155
test_recall,0.73358
train_auc,0.74972
train_f1,0.68303
train_precision,0.61959
train_precision_at_10,0.7445
train_recall,0.76094


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: kxc9tr6r with config:
[34m[1mwandb[0m: 	learning_rate: 0.04512285627983675
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	model_type: xgboost
[34m[1mwandb[0m: 	n_estimators: 500


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712



XGBOOST
Train AUC: 0.7452 | Test AUC: 0.7201
Test Precision: 0.6022 | Recall: 0.7337 | F1: 0.6615
Precision@10%: 0.6954
Model saved: best_xgboost_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.72015
test_f1,0.66148
test_precision,0.60218
test_precision_at_10,0.69542
test_recall,0.73374
train_auc,0.74525
train_f1,0.6782
train_precision,0.6165
train_precision_at_10,0.74323
train_recall,0.75363


[34m[1mwandb[0m: Agent Starting Run: j1q03znf with config:
[34m[1mwandb[0m: 	learning_rate: 0.027522525582145577
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	model_type: lightgbm
[34m[1mwandb[0m: 	n_estimators: 500


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712





LIGHTGBM
Train AUC: 0.7268 | Test AUC: 0.7201
Test Precision: 0.5996 | Recall: 0.7445 | F1: 0.6642
Precision@10%: 0.6868
Model saved: best_lightgbm_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.7201
test_f1,0.66424
test_precision,0.59959
test_precision_at_10,0.68676
test_recall,0.74453
train_auc,0.72676
train_f1,0.66696
train_precision,0.60196
train_precision_at_10,0.70453
train_recall,0.74771


[34m[1mwandb[0m: Agent Starting Run: 98fneooe with config:
[34m[1mwandb[0m: 	learning_rate: 0.01013750867545708
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	model_type: xgboost
[34m[1mwandb[0m: 	n_estimators: 500


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712



XGBOOST
Train AUC: 0.7220 | Test AUC: 0.7184
Test Precision: 0.5983 | Recall: 0.7345 | F1: 0.6594
Precision@10%: 0.6886
Model saved: best_xgboost_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.7184
test_f1,0.6594
test_precision,0.59826
test_precision_at_10,0.68856
test_recall,0.73446
train_auc,0.72197
train_f1,0.66097
train_precision,0.59873
train_precision_at_10,0.69442
train_recall,0.73766


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 2onowvj6 with config:
[34m[1mwandb[0m: 	learning_rate: 0.037819432800965454
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	model_type: lightgbm
[34m[1mwandb[0m: 	n_estimators: 500


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712





LIGHTGBM
Train AUC: 0.7285 | Test AUC: 0.7206
Test Precision: 0.6023 | Recall: 0.7430 | F1: 0.6653
Precision@10%: 0.6940
Model saved: best_lightgbm_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.72065
test_f1,0.66528
test_precision,0.60228
test_precision_at_10,0.69397
test_recall,0.74301
train_auc,0.72847
train_f1,0.66798
train_precision,0.60404
train_precision_at_10,0.70967
train_recall,0.74705


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pwdfm5xd with config:
[34m[1mwandb[0m: 	learning_rate: 0.057619656774927135
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	model_type: lightgbm
[34m[1mwandb[0m: 	n_estimators: 500


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712





LIGHTGBM
Train AUC: 0.7313 | Test AUC: 0.7206
Test Precision: 0.6017 | Recall: 0.7409 | F1: 0.6641
Precision@10%: 0.6972
Model saved: best_lightgbm_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.72061
test_f1,0.66409
test_precision,0.60174
test_precision_at_10,0.69722
test_recall,0.74085
train_auc,0.73128
train_f1,0.66938
train_precision,0.60561
train_precision_at_10,0.71671
train_recall,0.74815


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ljj8unxl with config:
[34m[1mwandb[0m: 	learning_rate: 0.02507264838839407
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	model_type: xgboost
[34m[1mwandb[0m: 	n_estimators: 500


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712



XGBOOST
Train AUC: 0.7262 | Test AUC: 0.7201
Test Precision: 0.6007 | Recall: 0.7443 | F1: 0.6648
Precision@10%: 0.6857
Model saved: best_xgboost_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.72011
test_f1,0.66483
test_precision,0.60071
test_precision_at_10,0.68567
test_recall,0.74429
train_auc,0.72621
train_f1,0.66671
train_precision,0.6019
train_precision_at_10,0.7011
train_recall,0.74717


[34m[1mwandb[0m: Agent Starting Run: lv91ocgn with config:
[34m[1mwandb[0m: 	learning_rate: 0.04588261588485509
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	model_type: lightgbm
[34m[1mwandb[0m: 	n_estimators: 500


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Dataset: 138559 users
Churn rate: 45.16%
Train: 110847 | Test: 27712





LIGHTGBM
Train AUC: 0.7296 | Test AUC: 0.7207
Test Precision: 0.6022 | Recall: 0.7431 | F1: 0.6653
Precision@10%: 0.6918
Model saved: best_lightgbm_model.pkl


0,1
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁
train_f1,▁
train_precision,▁
train_precision_at_10,▁
train_recall,▁

0,1
test_auc,0.72069
test_f1,0.66526
test_precision,0.60219
test_precision_at_10,0.69181
test_recall,0.74309
train_auc,0.72956
train_f1,0.66846
train_precision,0.60467
train_precision_at_10,0.71445
train_recall,0.74729


# Lstm attention churn

In [13]:
SWEEP_CONFIG = {
    'method': 'bayes',
    'metric': {
        'name': 'test_auc',
        'goal': 'maximize'
    },
    'parameters': {
        'hidden_dim': {
            'values': [64, 128, 256]
        },
        'num_layers': {
            'values': [1, 2, 3]
        },
        'dropout': {
            'distribution': 'uniform',
            'min': 0.1,
            'max': 0.5
        },
        'batch_size': {
            'values': [16, 32, 64]
        },
        'max_len': {
            'values': [30, 50, 100]
        },
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 0.0001,
            'max': 0.01
        },
        'epochs': {
            'value': 30
        }
    }
}

In [14]:
class SequenceDataset(Dataset):
    def __init__(self, sequences, labels, max_len=50):
        self.sequences = sequences
        self.labels = labels
        self.max_len = max_len

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        seq = self.sequences[idx]
        label = self.labels[idx]

        if len(seq) > self.max_len:
            seq = seq[-self.max_len:]
        else:
            pad = np.zeros((self.max_len - len(seq), seq.shape[1]))
            seq = np.vstack([pad, seq])

        mask = torch.ones(self.max_len)
        if len(self.sequences[idx]) < self.max_len:
            mask[:self.max_len - len(self.sequences[idx])] = 0

        return torch.FloatTensor(seq), torch.FloatTensor([label]), mask

class AttentionLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, num_layers=2, dropout=0.3):
        super(AttentionLSTM, self).__init__()

        self.lstm = nn.LSTM(
            input_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0,
            bidirectional=True
        )

        self.attention = nn.Sequential(
            nn.Linear(hidden_dim * 2, hidden_dim),
            nn.Tanh(),
            nn.Linear(hidden_dim, 1)
        )

        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim * 2, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(32, 1)
        )

    def forward(self, x, mask):
        lstm_out, _ = self.lstm(x)

        attn_weights = self.attention(lstm_out)
        attn_weights = attn_weights.masked_fill(mask.unsqueeze(-1) == 0, -1e9)
        attn_weights = F.softmax(attn_weights, dim=1)

        context = torch.sum(attn_weights * lstm_out, dim=1)
        output = self.classifier(context)

        return torch.sigmoid(output), attn_weights

def create_temporal_dataset(df, observation_days=31, prediction_days=30):
    max_date = df['event_dt'].max()
    min_date = df['event_dt'].min()

    total_days = (max_date - min_date).days
    print(f"Total data span: {total_days} days ({min_date.date()} to {max_date.date()})")

    observation_end = max_date - pd.Timedelta(days=prediction_days)
    observation_start = observation_end - pd.Timedelta(days=observation_days)

    if observation_start < min_date:
        observation_start = min_date
        print(f"Warning: Adjusted observation start to {observation_start.date()}")

    print(f"Observation window: {observation_start.date()} to {observation_end.date()} ({observation_days} days)")
    print(f"Prediction window: {observation_end.date()} to {max_date.date()} ({prediction_days} days)")

    observation_data = df[
        (df['event_dt'] >= observation_start) &
        (df['event_dt'] < observation_end)
    ]

    prediction_data = df[df['event_dt'] >= observation_end]

    active_users_observation = set(observation_data['device_id'].unique())
    active_users_prediction = set(prediction_data['device_id'].unique())

    churned_users = active_users_observation - active_users_prediction

    print(f"Users in observation: {len(active_users_observation)}")
    print(f"Users in prediction: {len(active_users_prediction)}")
    print(f"Churned users: {len(churned_users)}")

    churn_labels = {}
    for user in active_users_observation:
        churn_labels[user] = 1 if user in churned_users else 0

    return observation_data, churn_labels

def create_sequences(df):
    df = df.sort_values(['device_id', 'event_dt'])

    le_screen = LabelEncoder()
    le_feature = LabelEncoder()
    le_action = LabelEncoder()

    df['screen_enc'] = le_screen.fit_transform(df['screen'].fillna('unknown'))
    df['feature_enc'] = le_feature.fit_transform(df['feature'].fillna('unknown'))
    df['action_enc'] = le_action.fit_transform(df['action'].fillna('unknown'))

    df['hour'] = df['event_dt'].dt.hour / 24.0
    df['day_of_week'] = df['event_dt'].dt.dayofweek / 7.0
    df['is_weekend'] = (df['event_dt'].dt.dayofweek >= 5).astype(float)

    df['time_diff'] = df.groupby('device_id')['event_dt'].diff().dt.total_seconds() / 3600
    df['time_diff'] = df['time_diff'].fillna(0).clip(0, 24) / 24.0

    feature_cols = [
        'screen_enc', 'feature_enc', 'action_enc',
        'hour', 'day_of_week', 'is_weekend', 'time_diff'
    ]

    sequences = []
    device_ids = []

    for device_id, group in df.groupby('device_id'):
        seq = group[feature_cols].values.astype(np.float32)
        sequences.append(seq)
        device_ids.append(device_id)

    return sequences, device_ids, len(feature_cols)

def calculate_metrics(y_true, y_prob, threshold=0.5):
    y_pred = (y_prob > threshold).astype(int)

    metrics = {
        'auc_roc': roc_auc_score(y_true, y_prob),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1': f1_score(y_true, y_pred, zero_division=0)
    }

    top_10_pct = int(0.1 * len(y_prob))
    top_indices = np.argsort(y_prob)[-top_10_pct:]
    metrics['precision_at_10'] = np.mean([y_true[i] for i in top_indices])

    return metrics

def train_lstm(model, train_loader, val_loader, config, device='cuda'):
    model = model.to(device)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=3)

    best_val_auc = 0
    patience = 7
    patience_counter = 0

    for epoch in range(config.epochs):
        model.train()
        train_loss = 0
        train_preds = []
        train_labels = []

        for sequences, labels, masks in train_loader:
            sequences = sequences.to(device)
            labels = labels.to(device)
            masks = masks.to(device)

            optimizer.zero_grad()
            outputs, _ = model(sequences, masks)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()
            train_preds.extend(outputs.detach().cpu().numpy())
            train_labels.extend(labels.detach().cpu().numpy())

        model.eval()
        val_loss = 0
        val_preds = []
        val_labels = []

        with torch.no_grad():
            for sequences, labels, masks in val_loader:
                sequences = sequences.to(device)
                labels = labels.to(device)
                masks = masks.to(device)

                outputs, _ = model(sequences, masks)
                loss = criterion(outputs, labels)

                val_loss += loss.item()
                val_preds.extend(outputs.cpu().numpy())
                val_labels.extend(labels.cpu().numpy())

        train_metrics = calculate_metrics(np.array(train_labels).flatten(), np.array(train_preds).flatten())
        val_metrics = calculate_metrics(np.array(val_labels).flatten(), np.array(val_preds).flatten())

        scheduler.step(val_metrics['auc_roc'])

        wandb.log({
            'epoch': epoch + 1,
            'train_loss': train_loss / len(train_loader),
            'val_loss': val_loss / len(val_loader),
            'train_auc': train_metrics['auc_roc'],
            'val_auc': val_metrics['auc_roc'],
            'val_precision': val_metrics['precision'],
            'val_recall': val_metrics['recall'],
            'val_f1': val_metrics['f1'],
            'val_precision_at_10': val_metrics['precision_at_10'],
            'learning_rate': optimizer.param_groups[0]['lr']
        })

        print(f"Epoch {epoch+1:02d} | Train Loss: {train_loss/len(train_loader):.4f} | Val Loss: {val_loss/len(val_loader):.4f}")
        print(f"  Train AUC: {train_metrics['auc_roc']:.4f} | Val AUC: {val_metrics['auc_roc']:.4f}")
        print(f"  Val Precision: {val_metrics['precision']:.4f} | Recall: {val_metrics['recall']:.4f} | F1: {val_metrics['f1']:.4f}")

        if val_metrics['auc_roc'] > best_val_auc:
            best_val_auc = val_metrics['auc_roc']
            patience_counter = 0
            torch.save(model.state_dict(), 'best_lstm_model.pth')
            torch.save({
                'model_state_dict': model.state_dict(),
                'config': {
                    'input_dim': model.lstm.input_size,
                    'hidden_dim': config.hidden_dim,
                    'num_layers': config.num_layers,
                    'dropout': config.dropout
                }
            }, 'best_lstm_model_full.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    model.load_state_dict(torch.load('best_lstm_model.pth'))
    model = model.to(device)
    return model

def main(config=None):
    with wandb.init(config=config):
        config = wandb.config

        hidden_dim = getattr(config, 'hidden_dim', 128)
        num_layers = getattr(config, 'num_layers', 2)
        dropout = getattr(config, 'dropout', 0.3)
        batch_size = getattr(config, 'batch_size', 32)
        max_len = getattr(config, 'max_len', 50)
        learning_rate = getattr(config, 'learning_rate', 0.001)
        epochs = getattr(config, 'epochs', 30)

        df = pd.read_csv("/content/group_project/clean_data.csv")
        df['event_dt'] = pd.to_datetime(df['event_dt'])

        observation_data, churn_labels = create_temporal_dataset(
            df, observation_days=31, prediction_days=30
        )

        sequences, device_ids, input_dim = create_sequences(observation_data)
        labels = [churn_labels.get(uid, 0) for uid in device_ids]

        print(f"\nTotal users: {len(sequences)}")
        print(f"Churn rate: {np.mean(labels):.2%}")
        print(f"Input dim: {input_dim}")

        train_seqs, val_seqs, train_labels, val_labels = train_test_split(
            sequences, labels, test_size=0.2, random_state=42, stratify=labels
        )

        train_dataset = SequenceDataset(train_seqs, train_labels, max_len=max_len)
        val_dataset = SequenceDataset(val_seqs, val_labels, max_len=max_len)

        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        model = AttentionLSTM(
            input_dim=input_dim,
            hidden_dim=hidden_dim,
            num_layers=num_layers,
            dropout=dropout
        )

        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"\nUsing device: {device}")

        class Config:
            pass

        train_config = Config()
        train_config.hidden_dim = hidden_dim
        train_config.num_layers = num_layers
        train_config.dropout = dropout
        train_config.learning_rate = learning_rate
        train_config.epochs = epochs

        trained_model = train_lstm(
            model,
            train_loader,
            val_loader,
            train_config,
            device=device
        )

        print("\nFinal Test Evaluation")
        model.eval()
        test_preds = []
        test_labels = []

        with torch.no_grad():
            for sequences, labels, masks in val_loader:
                sequences = sequences.to(device)
                masks = masks.to(device)
                outputs, _ = model(sequences, masks)
                test_preds.extend(outputs.cpu().numpy())
                test_labels.extend(labels.cpu().numpy())

        final_metrics = calculate_metrics(np.array(test_labels).flatten(), np.array(test_preds).flatten())

        wandb.log({
            'test_auc': final_metrics['auc_roc'],
            'test_precision': final_metrics['precision'],
            'test_recall': final_metrics['recall'],
            'test_f1': final_metrics['f1'],
            'test_precision_at_10': final_metrics['precision_at_10']
        })

        artifact = wandb.Artifact('lstm_churn_model', type='model')
        artifact.add_file('best_lstm_model_full.pth')
        wandb.log_artifact(artifact)

        print(f"Test AUC: {final_metrics['auc_roc']:.4f}")
        print(f"Test Precision: {final_metrics['precision']:.4f} | Recall: {final_metrics['recall']:.4f} | F1: {final_metrics['f1']:.4f}")
        print(f"Precision@10%: {final_metrics['precision_at_10']:.4f}")
        print(f"Model saved: best_lstm_model_full.pth")

def run_sweep(count=10):
    sweep_id = wandb.sweep(SWEEP_CONFIG, project="churn-prediction")
    print(f"Sweep ID: {sweep_id}")
    print(f"Running {count} sweep iterations...")
    wandb.agent(sweep_id, function=main, count=count)

In [15]:
run_sweep(count=10)

Create sweep with ID: kow3mbqb
Sweep URL: https://wandb.ai/katrinpochtar/churn-prediction/sweeps/kow3mbqb
Sweep ID: kow3mbqb
Running 10 sweep iterations...


[34m[1mwandb[0m: Agent Starting Run: 65qsg8ry with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.20693770530920955
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0006321787915948659
[34m[1mwandb[0m: 	max_len: 100
[34m[1mwandb[0m: 	num_layers: 1


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6352 | Val Loss: 0.6212
  Train AUC: 0.6773 | Val AUC: 0.6964
  Val Precision: 0.5996 | Recall: 0.6374 | F1: 0.6180
Epoch 02 | Train Loss: 0.6236 | Val Loss: 0.6205
  Train AUC: 0.6920 | Val AUC: 0.6989
  Val Precision: 0.5893 | Recall: 0.7163 | F1: 0.6466
Epoch 03 | Train Loss: 0.6217 | Val Loss: 0.6200
  Train AUC: 0.6944 | Val AUC: 0.6963
  Val Precision: 0.5841 | Recall: 0.7207 | F1: 0.6453
Epoch 04 | Train Loss: 0.6203 | Val Loss: 0.6170
  Train AUC: 0.6967 | Val AUC: 0.7025
  Val Precision: 0.6090 | Recall: 0.6336 | F1: 0.6211
Epoch 05 | Train Loss: 0.6194 | Val Loss: 0.6207
  Train AUC: 0.6978 | Val AUC: 0.7030
  Val Precision: 0.6174 | Recall: 0.5

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇█▇███████
train_loss,█▅▅▄▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁
val_auc,▁▂▁▃▃▃▄▄▅▃▅▅▅▆▆▆▆▆▅▇▇█▆███████

0,1
epoch,30
learning_rate,0.00063
test_auc,0.71872
test_f1,0.62232
test_precision,0.62455
test_precision_at_10,0.69109
test_recall,0.62011
train_auc,0.71667
train_loss,0.60762
val_auc,0.71792


[34m[1mwandb[0m: Agent Starting Run: 73bf1sd6 with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.37997737549563615
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00010621231418673904
[34m[1mwandb[0m: 	max_len: 100
[34m[1mwandb[0m: 	num_layers: 2


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6439 | Val Loss: 0.6234
  Train AUC: 0.6650 | Val AUC: 0.6926
  Val Precision: 0.5996 | Recall: 0.5976 | F1: 0.5986
Epoch 02 | Train Loss: 0.6282 | Val Loss: 0.6191
  Train AUC: 0.6873 | Val AUC: 0.7012
  Val Precision: 0.6095 | Recall: 0.5921 | F1: 0.6006
Epoch 03 | Train Loss: 0.6241 | Val Loss: 0.6221
  Train AUC: 0.6919 | Val AUC: 0.7045
  Val Precision: 0.5726 | Recall: 0.8013 | F1: 0.6679
Epoch 04 | Train Loss: 0.6225 | Val Loss: 0.6341
  Train AUC: 0.6963 | Val AUC: 0.7020
  Val Precision: 0.6324 | Recall: 0.4787 | F1: 0.5449
Epoch 05 | Train Loss: 0.6203 | Val Loss: 0.6126
  Train AUC: 0.6996 | Val AUC: 0.7084
  Val Precision: 0.6173 | Recall: 0.6

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
learning_rate,███████████████████████████▁▁▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████
train_loss,█▅▄▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁
val_auc,▁▃▄▃▅▅▆▆▆▇▇▇▆▇▆▇▇▇▆▇▇▇▇▇▇▇▇▇██

0,1
epoch,30
learning_rate,1e-05
test_auc,0.72176
test_f1,0.64131
test_precision,0.61246
test_precision_at_10,0.69794
test_recall,0.67301
train_auc,0.72
train_loss,0.6057
val_auc,0.72176


[34m[1mwandb[0m: Agent Starting Run: 3lt6hjw5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.3626147525939415
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00041048872468466496
[34m[1mwandb[0m: 	max_len: 30
[34m[1mwandb[0m: 	num_layers: 3


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6449 | Val Loss: 0.6286
  Train AUC: 0.6644 | Val AUC: 0.6908
  Val Precision: 0.5789 | Recall: 0.7372 | F1: 0.6485
Epoch 02 | Train Loss: 0.6298 | Val Loss: 0.6286
  Train AUC: 0.6855 | Val AUC: 0.6986
  Val Precision: 0.6120 | Recall: 0.5706 | F1: 0.5905
Epoch 03 | Train Loss: 0.6263 | Val Loss: 0.6205
  Train AUC: 0.6934 | Val AUC: 0.7052
  Val Precision: 0.5820 | Recall: 0.7726 | F1: 0.6639
Epoch 04 | Train Loss: 0.6231 | Val Loss: 0.6205
  Train AUC: 0.6974 | Val AUC: 0.7087
  Val Precision: 0.6016 | Recall: 0.6812 | F1: 0.6389
Epoch 05 | Train Loss: 0.6216 | Val Loss: 0.6152
  Train AUC: 0.6999 | Val AUC: 0.7105
  Val Precision: 0.5950 | Recall: 0.7

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
learning_rate,████████▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▄▅▆▆▆▆▆▇▇████████████████████
train_loss,█▅▄▃▃▃▃▃▃▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,▁▃▅▆▆▆▅▆▄▇▇▇██████████████████

0,1
epoch,30
learning_rate,0.0
test_auc,0.71657
test_f1,0.64819
test_precision,0.60723
test_precision_at_10,0.68603
test_recall,0.69506
train_auc,0.714
train_loss,0.61128
val_auc,0.71657


[34m[1mwandb[0m: Agent Starting Run: p1oa4cvu with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.32240472378314045
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0004961656046055474
[34m[1mwandb[0m: 	max_len: 50
[34m[1mwandb[0m: 	num_layers: 3


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6419 | Val Loss: 0.6306
  Train AUC: 0.6680 | Val AUC: 0.6926
  Val Precision: 0.6213 | Recall: 0.4424 | F1: 0.5168
Epoch 02 | Train Loss: 0.6270 | Val Loss: 0.6180
  Train AUC: 0.6899 | Val AUC: 0.7013
  Val Precision: 0.6037 | Recall: 0.6264 | F1: 0.6148
Epoch 03 | Train Loss: 0.6240 | Val Loss: 0.6219
  Train AUC: 0.6948 | Val AUC: 0.7037
  Val Precision: 0.6283 | Recall: 0.5288 | F1: 0.5743
Epoch 04 | Train Loss: 0.6213 | Val Loss: 0.6136
  Train AUC: 0.6990 | Val AUC: 0.7098
  Val Precision: 0.6290 | Recall: 0.5511 | F1: 0.5874
Epoch 05 | Train Loss: 0.6191 | Val Loss: 0.6114
  Train AUC: 0.7024 | Val AUC: 0.7114
  Val Precision: 0.6162 | Recall: 0.6

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
learning_rate,█████████████████████▁▁▁▁▁▁▁▁▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████
train_loss,█▅▅▄▄▄▃▃▃▃▃▃▂▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁
val_auc,▁▃▄▅▅▆▆▆▇▇▇▇▇▇▇▇▇██▇▆▇████████

0,1
epoch,30
learning_rate,5e-05
test_auc,0.72194
test_f1,0.64305
test_precision,0.6109
test_precision_at_10,0.69903
test_recall,0.67876
train_auc,0.72262
train_loss,0.60364
val_auc,0.72176


[34m[1mwandb[0m: Agent Starting Run: lvds1ghz with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4842821620353661
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.004200517988259345
[34m[1mwandb[0m: 	max_len: 50
[34m[1mwandb[0m: 	num_layers: 2


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6556 | Val Loss: 0.6547
  Train AUC: 0.6478 | Val AUC: 0.6859
  Val Precision: 0.5375 | Recall: 0.8801 | F1: 0.6674
Epoch 02 | Train Loss: 0.6448 | Val Loss: 0.6271
  Train AUC: 0.6642 | Val AUC: 0.6905
  Val Precision: 0.5830 | Recall: 0.7341 | F1: 0.6499
Epoch 03 | Train Loss: 0.6435 | Val Loss: 0.6311
  Train AUC: 0.6657 | Val AUC: 0.6954
  Val Precision: 0.6071 | Recall: 0.6206 | F1: 0.6138
Epoch 04 | Train Loss: 0.6419 | Val Loss: 0.6333
  Train AUC: 0.6667 | Val AUC: 0.6938
  Val Precision: 0.5854 | Recall: 0.7297 | F1: 0.6496
Epoch 05 | Train Loss: 0.6425 | Val Loss: 0.6507
  Train AUC: 0.6644 | Val AUC: 0.6818
  Val Precision: 0.5631 | Recall: 0.8

0,1
epoch,▁▂▃▃▄▅▆▆▇█
learning_rate,██████▁▁▁▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▅▅▆▅▅▃▇▇█
train_loss,█▅▄▄▄▄▅▂▁▁
val_auc,▄▆█▇▂▅▁▆▅▇

0,1
epoch,10
learning_rate,0.00042
test_auc,0.69538
test_f1,0.61377
test_precision,0.6071
test_precision_at_10,0.65464
test_recall,0.62058
train_auc,0.67678
train_loss,0.63368
val_auc,0.69368


[34m[1mwandb[0m: Agent Starting Run: 2zj6ml1t with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.26638500784680225
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	learning_rate: 0.0003127232706249167
[34m[1mwandb[0m: 	max_len: 100
[34m[1mwandb[0m: 	num_layers: 3


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6379 | Val Loss: 0.6194
  Train AUC: 0.6733 | Val AUC: 0.6984
  Val Precision: 0.5965 | Recall: 0.6684 | F1: 0.6304
Epoch 02 | Train Loss: 0.6244 | Val Loss: 0.6185
  Train AUC: 0.6931 | Val AUC: 0.7040
  Val Precision: 0.6247 | Recall: 0.5530 | F1: 0.5867
Epoch 03 | Train Loss: 0.6205 | Val Loss: 0.6132
  Train AUC: 0.6996 | Val AUC: 0.7099
  Val Precision: 0.6015 | Recall: 0.6846 | F1: 0.6404
Epoch 04 | Train Loss: 0.6174 | Val Loss: 0.6111
  Train AUC: 0.7040 | Val AUC: 0.7127
  Val Precision: 0.6009 | Recall: 0.7078 | F1: 0.6500
Epoch 05 | Train Loss: 0.6163 | Val Loss: 0.6118
  Train AUC: 0.7051 | Val AUC: 0.7099
  Val Precision: 0.6194 | Recall: 0.5

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
learning_rate,█████████████████████████▂▂▂▂▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▃▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇████
train_loss,█▆▅▅▄▄▄▄▄▄▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
val_auc,▁▃▅▅▅▆▇▅▆▆▇▄▇▇▇▇▇█████▇▇▇█████

0,1
epoch,30
learning_rate,0.0
test_auc,0.72119
test_f1,0.63765
test_precision,0.6145
test_precision_at_10,0.70155
test_recall,0.66262
train_auc,0.7289
train_loss,0.59641
val_auc,0.72052


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 5t5bffzw with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.19266170332071175
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00010238939728687592
[34m[1mwandb[0m: 	max_len: 100
[34m[1mwandb[0m: 	num_layers: 3


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6476 | Val Loss: 0.6299
  Train AUC: 0.6557 | Val AUC: 0.6841
  Val Precision: 0.6001 | Recall: 0.5360 | F1: 0.5663
Epoch 02 | Train Loss: 0.6315 | Val Loss: 0.6244
  Train AUC: 0.6780 | Val AUC: 0.6886
  Val Precision: 0.5821 | Recall: 0.6943 | F1: 0.6333
Epoch 03 | Train Loss: 0.6277 | Val Loss: 0.6241
  Train AUC: 0.6847 | Val AUC: 0.6934
  Val Precision: 0.5740 | Recall: 0.7643 | F1: 0.6556
Epoch 04 | Train Loss: 0.6250 | Val Loss: 0.6199
  Train AUC: 0.6889 | Val AUC: 0.6965
  Val Precision: 0.5910 | Recall: 0.6742 | F1: 0.6299
Epoch 05 | Train Loss: 0.6214 | Val Loss: 0.6153
  Train AUC: 0.6947 | Val AUC: 0.7048
  Val Precision: 0.5900 | Recall: 0.7

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
learning_rate,██████████████████████████▁▁▁▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▃▄▅▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████
train_loss,█▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁
val_auc,▁▂▃▃▅▅▆▅▆▆▇▇▇▇▇▇▇█▇▇███▇██▇███

0,1
epoch,30
learning_rate,1e-05
test_auc,0.71951
test_f1,0.63602
test_precision,0.61464
test_precision_at_10,0.69
test_recall,0.65894
train_auc,0.71822
train_loss,0.60615
val_auc,0.71951


[34m[1mwandb[0m: Agent Starting Run: 1v4jeo4t with config:
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dropout: 0.3406863534394951
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0001110140781223414
[34m[1mwandb[0m: 	max_len: 100
[34m[1mwandb[0m: 	num_layers: 3


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6438 | Val Loss: 0.6226
  Train AUC: 0.6649 | Val AUC: 0.6919
  Val Precision: 0.5903 | Recall: 0.6655 | F1: 0.6256
Epoch 02 | Train Loss: 0.6279 | Val Loss: 0.6281
  Train AUC: 0.6874 | Val AUC: 0.7022
  Val Precision: 0.5716 | Recall: 0.7937 | F1: 0.6645
Epoch 03 | Train Loss: 0.6223 | Val Loss: 0.6143
  Train AUC: 0.6972 | Val AUC: 0.7101
  Val Precision: 0.5895 | Recall: 0.7458 | F1: 0.6585
Epoch 04 | Train Loss: 0.6203 | Val Loss: 0.6168
  Train AUC: 0.7004 | Val AUC: 0.7067
  Val Precision: 0.5919 | Recall: 0.7020 | F1: 0.6423
Epoch 05 | Train Loss: 0.6188 | Val Loss: 0.6125
  Train AUC: 0.7027 | Val AUC: 0.7102
  Val Precision: 0.6217 | Recall: 0.5

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
learning_rate,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▄▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇████████████
train_loss,█▅▄▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_auc,▁▄▆▅▆▆▆▆▆▇▇▇▇▇▆▇▇▇██▇█████▇███

0,1
epoch,30
learning_rate,0.00011
test_auc,0.71917
test_f1,0.60773
test_precision,0.62876
test_precision_at_10,0.68856
test_recall,0.58806
train_auc,0.71571
train_loss,0.60857
val_auc,0.71745


[34m[1mwandb[0m: Agent Starting Run: nr92grp8 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.3532227716241445
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.00015432970414197675
[34m[1mwandb[0m: 	max_len: 100
[34m[1mwandb[0m: 	num_layers: 3


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6467 | Val Loss: 0.6264
  Train AUC: 0.6596 | Val AUC: 0.6878
  Val Precision: 0.5963 | Recall: 0.5897 | F1: 0.5930
Epoch 02 | Train Loss: 0.6289 | Val Loss: 0.6178
  Train AUC: 0.6856 | Val AUC: 0.7000
  Val Precision: 0.6096 | Recall: 0.5780 | F1: 0.5934
Epoch 03 | Train Loss: 0.6232 | Val Loss: 0.6223
  Train AUC: 0.6950 | Val AUC: 0.7068
  Val Precision: 0.5735 | Recall: 0.7847 | F1: 0.6627
Epoch 04 | Train Loss: 0.6201 | Val Loss: 0.6164
  Train AUC: 0.6999 | Val AUC: 0.7097
  Val Precision: 0.5932 | Recall: 0.7114 | F1: 0.6469
Epoch 05 | Train Loss: 0.6192 | Val Loss: 0.6137
  Train AUC: 0.7019 | Val AUC: 0.7091
  Val Precision: 0.6260 | Recall: 0.5

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
learning_rate,██████████████████▁▁▁▁▁▁▁▁▁▁▁▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▄▅▆▆▇▆▇▇▇▇▇▇▇▇▇▇▇▇███████████
train_loss,█▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁
val_auc,▁▄▅▆▆▆▇▇▇▇▇▇▆▇████▇███████████

0,1
epoch,30
learning_rate,2e-05
test_auc,0.7179
test_f1,0.63205
test_precision,0.61615
test_precision_at_10,0.69
test_recall,0.64879
train_auc,0.71614
train_loss,0.60851
val_auc,0.7179


[34m[1mwandb[0m: Agent Starting Run: 2wud7r12 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	dropout: 0.1342060598911996
[34m[1mwandb[0m: 	epochs: 30
[34m[1mwandb[0m: 	hidden_dim: 64
[34m[1mwandb[0m: 	learning_rate: 0.0005614417465814516
[34m[1mwandb[0m: 	max_len: 50
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Total users: 138559
Churn rate: 45.16%
Input dim: 7

Using device: cuda
Epoch 01 | Train Loss: 0.6395 | Val Loss: 0.6266
  Train AUC: 0.6700 | Val AUC: 0.6908
  Val Precision: 0.5986 | Recall: 0.5851 | F1: 0.5918
Epoch 02 | Train Loss: 0.6253 | Val Loss: 0.6191
  Train AUC: 0.6900 | Val AUC: 0.6982
  Val Precision: 0.6020 | Recall: 0.6085 | F1: 0.6053
Epoch 03 | Train Loss: 0.6216 | Val Loss: 0.6169
  Train AUC: 0.6964 | Val AUC: 0.7035
  Val Precision: 0.6011 | Recall: 0.6523 | F1: 0.6257
Epoch 04 | Train Loss: 0.6195 | Val Loss: 0.6246
  Train AUC: 0.7001 | Val AUC: 0.7019
  Val Precision: 0.6205 | Recall: 0.4640 | F1: 0.5310
Epoch 05 | Train Loss: 0.6182 | Val Loss: 0.6130
  Train AUC: 0.7019 | Val AUC: 0.7099
  Val Precision: 0.6116 | Recall: 0.6

0,1
epoch,▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇███
learning_rate,█████████████████████████▁▁▁▁▁
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▃▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇████
train_loss,█▆▅▅▅▄▄▄▄▃▃▃▃▃▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
val_auc,▁▃▄▄▅▆▆▆▇▇▇▇▇▇▆▇▇▆██▇██▆██████

0,1
epoch,30
learning_rate,6e-05
test_auc,0.72096
test_f1,0.64162
test_precision,0.61198
test_precision_at_10,0.70696
test_recall,0.67428
train_auc,0.72923
train_loss,0.59675
val_auc,0.72078


# GNN

In [17]:
SWEEP_CONFIG = {
    'method': 'bayes',
    'metric': {
        'name': 'test_auc',
        'goal': 'maximize'
    },
    'parameters': {
        'hidden_channels': {
            'values': [64, 128, 256]
        },
        'num_layers': {
            'values': [1, 2, 3]
        },
        'dropout': {
            'distribution': 'uniform',
            'min': 0.1,
            'max': 0.5
        },
        'edges_per_node': {
            'values': [3, 5, 10]
        },
        'learning_rate': {
            'distribution': 'log_uniform_values',
            'min': 0.001,
            'max': 0.1
        },
        'weight_decay': {
            'distribution': 'log_uniform_values',
            'min': 1e-5,
            'max': 1e-3
        },
        'epochs': {
            'value': 100
        }
    }
}

In [18]:
class GraphSAGEChurn(nn.Module):
    def __init__(self, in_channels, hidden_channels=128, num_layers=2, dropout=0.3):
        super(GraphSAGEChurn, self).__init__()

        self.convs = nn.ModuleList()
        self.convs.append(SAGEConv(in_channels, hidden_channels))

        for _ in range(num_layers - 1):
            self.convs.append(SAGEConv(hidden_channels, hidden_channels))

        self.dropout = dropout

        self.classifier = nn.Sequential(
            nn.Linear(hidden_channels, 64),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(32, 1)
        )

    def forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            if i < len(self.convs) - 1:
                x = F.relu(x)
                x = F.dropout(x, p=self.dropout, training=self.training)

        out = self.classifier(x)
        return torch.sigmoid(out)

def create_temporal_dataset(df, observation_days=31, prediction_days=30):
    max_date = df['event_dt'].max()
    min_date = df['event_dt'].min()

    total_days = (max_date - min_date).days
    print(f"Total data span: {total_days} days ({min_date.date()} to {max_date.date()})")

    observation_end = max_date - pd.Timedelta(days=prediction_days)
    observation_start = observation_end - pd.Timedelta(days=observation_days)

    if observation_start < min_date:
        observation_start = min_date
        print(f"Warning: Adjusted observation start to {observation_start.date()}")

    print(f"Observation window: {observation_start.date()} to {observation_end.date()} ({observation_days} days)")
    print(f"Prediction window: {observation_end.date()} to {max_date.date()} ({prediction_days} days)")

    observation_data = df[
        (df['event_dt'] >= observation_start) &
        (df['event_dt'] < observation_end)
    ]

    prediction_data = df[df['event_dt'] >= observation_end]

    active_users_observation = set(observation_data['device_id'].unique())
    active_users_prediction = set(prediction_data['device_id'].unique())

    churned_users = active_users_observation - active_users_prediction

    print(f"Users in observation: {len(active_users_observation)}")
    print(f"Users in prediction: {len(active_users_prediction)}")
    print(f"Churned users: {len(churned_users)}")

    churn_labels = {}
    for user in active_users_observation:
        churn_labels[user] = 1 if user in churned_users else 0

    return observation_data, churn_labels

def extract_user_features(df):
    features = df.groupby('device_id').agg({
        'event_dt': ['count', 'min', 'max'],
        'session_id': 'nunique',
        'screen': 'nunique',
        'feature': 'nunique'
    })

    features.columns = ['_'.join(col) for col in features.columns]

    features['days_in_window'] = (
        features['event_dt_max'] - features['event_dt_min']
    ).dt.total_seconds() / 86400

    features['events_per_day'] = features['event_dt_count'] / features['days_in_window'].clip(lower=1)
    features['sessions_per_day'] = features['session_id_nunique'] / features['days_in_window'].clip(lower=1)
    features['events_per_session'] = features['event_dt_count'] / features['session_id_nunique'].clip(lower=1)
    features['screen_diversity'] = features['screen_nunique'] / features['event_dt_count']

    first_day = features['event_dt_min'].min()
    features['days_since_first_seen'] = (
        features['event_dt_min'] - first_day
    ).dt.total_seconds() / 86400

    last_day = features['event_dt_max'].max()
    features['days_until_window_end'] = (
        last_day - features['event_dt_max']
    ).dt.total_seconds() / 86400

    features['recency_in_window'] = features['days_until_window_end'] / features['days_in_window'].clip(lower=1)

    return features

def build_random_graph(n_users, edges_per_node=5):
    print(f"Building random graph with {edges_per_node} edges per node...")

    edge_list = []

    for i in range(n_users):
        neighbors = np.random.choice(n_users, size=min(edges_per_node, n_users-1), replace=False)
        neighbors = neighbors[neighbors != i]

        for j in neighbors:
            edge_list.append([i, j])

    edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()

    print(f"Graph: {n_users} nodes, {edge_index.size(1)} edges")

    return edge_index

def calculate_metrics(y_true, y_prob, threshold=0.5):
    y_pred = (y_prob > threshold).astype(int)

    metrics = {
        'auc_roc': roc_auc_score(y_true, y_prob),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0),
        'f1': f1_score(y_true, y_pred, zero_division=0)
    }

    top_10_pct = int(0.1 * len(y_prob))
    top_indices = np.argsort(y_prob)[-top_10_pct:]
    metrics['precision_at_10'] = np.mean(y_true[top_indices])

    return metrics

def train_gnn(model, data, train_mask, val_mask, config, device='cuda'):
    model = model.to(device)
    data = data.to(device)

    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate, weight_decay=config.weight_decay)

    best_val_auc = 0
    patience = 15
    patience_counter = 0

    for epoch in range(config.epochs):
        model.train()
        optimizer.zero_grad()

        out = model(data.x, data.edge_index).squeeze()
        loss = criterion(out[train_mask], data.y[train_mask])

        loss.backward()
        optimizer.step()

        model.eval()
        with torch.no_grad():
            out = model(data.x, data.edge_index).squeeze()

            train_pred = out[train_mask].cpu().numpy()
            train_true = data.y[train_mask].cpu().numpy()

            val_pred = out[val_mask].cpu().numpy()
            val_true = data.y[val_mask].cpu().numpy()

            train_metrics = calculate_metrics(train_true, train_pred)
            val_metrics = calculate_metrics(val_true, val_pred)

        wandb.log({
            'epoch': epoch + 1,
            'train_loss': loss.item(),
            'train_auc': train_metrics['auc_roc'],
            'val_auc': val_metrics['auc_roc'],
            'val_precision': val_metrics['precision'],
            'val_recall': val_metrics['recall'],
            'val_f1': val_metrics['f1'],
            'val_precision_at_10': val_metrics['precision_at_10']
        })

        if (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1:03d} | Train Loss: {loss.item():.4f}")
            print(f"  Train AUC: {train_metrics['auc_roc']:.4f} | Val AUC: {val_metrics['auc_roc']:.4f}")
            print(f"  Val Precision: {val_metrics['precision']:.4f} | Recall: {val_metrics['recall']:.4f} | F1: {val_metrics['f1']:.4f}")

        if val_metrics['auc_roc'] > best_val_auc:
            best_val_auc = val_metrics['auc_roc']
            patience_counter = 0
            torch.save(model.state_dict(), 'best_gnn_model.pth')
            torch.save({
                'model_state_dict': model.state_dict(),
                'config': {
                    'in_channels': data.x.size(1),
                    'hidden_channels': config.hidden_channels,
                    'num_layers': config.num_layers,
                    'dropout': config.dropout
                }
            }, 'best_gnn_model_full.pth')
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break

    model.load_state_dict(torch.load('best_gnn_model.pth'))
    model = model.to(device)
    return model

def main(config=None):
    with wandb.init(config=config):
        config = wandb.config

        hidden_channels = getattr(config, 'hidden_channels', 128)
        num_layers = getattr(config, 'num_layers', 2)
        dropout = getattr(config, 'dropout', 0.3)
        edges_per_node = getattr(config, 'edges_per_node', 5)
        learning_rate = getattr(config, 'learning_rate', 0.01)
        weight_decay = getattr(config, 'weight_decay', 5e-4)
        epochs = getattr(config, 'epochs', 100)

        df = pd.read_csv("/content/group_project/clean_data.csv")
        df['event_dt'] = pd.to_datetime(df['event_dt'])

        print("Creating temporal dataset...")
        observation_data, churn_labels = create_temporal_dataset(
            df, observation_days=31, prediction_days=30
        )

        print("\nExtracting user features...")
        user_features = extract_user_features(observation_data)

        user_features = user_features.join(
            pd.Series(churn_labels, name='churn')
        )
        user_features = user_features.dropna()

        print(f"Total users: {len(user_features)}")
        print(f"Churn rate: {user_features['churn'].mean():.2%}")

        feature_cols = [col for col in user_features.columns
                       if col not in ['event_dt_min', 'event_dt_max', 'churn']]

        X = user_features[feature_cols].fillna(0).values
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        print("\nBuilding random graph...")
        edge_index = build_random_graph(len(user_features), edges_per_node=edges_per_node)

        x = torch.FloatTensor(X_scaled)
        y = torch.FloatTensor(user_features['churn'].values)

        data = Data(x=x, edge_index=edge_index, y=y)

        n_users = len(user_features)
        indices = np.arange(n_users)

        train_idx, test_idx = train_test_split(
            indices,
            test_size=0.2,
            random_state=42,
            stratify=user_features['churn']
        )

        train_mask = torch.zeros(n_users, dtype=torch.bool)
        test_mask = torch.zeros(n_users, dtype=torch.bool)
        train_mask[train_idx] = True
        test_mask[test_idx] = True

        print(f"\nTrain users: {train_mask.sum()} | Test users: {test_mask.sum()}")

        model = GraphSAGEChurn(
            in_channels=x.size(1),
            hidden_channels=hidden_channels,
            num_layers=num_layers,
            dropout=dropout
        )

        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"\nUsing device: {device}")

        class Config:
            pass

        train_config = Config()
        train_config.hidden_channels = hidden_channels
        train_config.num_layers = num_layers
        train_config.dropout = dropout
        train_config.learning_rate = learning_rate
        train_config.weight_decay = weight_decay
        train_config.epochs = epochs

        print("\nTraining GNN model...")
        trained_model = train_gnn(
            model,
            data,
            train_mask,
            test_mask,
            train_config,
            device=device
        )

        print("\nFinal Test Evaluation")
        model.eval()
        with torch.no_grad():
            out = model(data.x.to(device), data.edge_index.to(device)).squeeze()
            test_pred = out[test_mask].cpu().numpy()
            test_true = data.y[test_mask].cpu().numpy()

        final_metrics = calculate_metrics(test_true, test_pred)

        wandb.log({
            'test_auc': final_metrics['auc_roc'],
            'test_precision': final_metrics['precision'],
            'test_recall': final_metrics['recall'],
            'test_f1': final_metrics['f1'],
            'test_precision_at_10': final_metrics['precision_at_10']
        })

        artifact = wandb.Artifact('gnn_churn_model', type='model')
        artifact.add_file('best_gnn_model_full.pth')
        wandb.log_artifact(artifact)

        print(f"Test AUC: {final_metrics['auc_roc']:.4f}")
        print(f"Test Precision: {final_metrics['precision']:.4f} | Recall: {final_metrics['recall']:.4f} | F1: {final_metrics['f1']:.4f}")
        print(f"Precision@10%: {final_metrics['precision_at_10']:.4f}")
        print(f"Model saved: best_gnn_model_full.pth")

def run_sweep(count=10):
    sweep_id = wandb.sweep(SWEEP_CONFIG, project="churn-prediction")
    print(f"Sweep ID: {sweep_id}")
    print(f"Running {count} sweep iterations...")
    wandb.agent(sweep_id, function=main, count=count)

In [19]:
run_sweep(count=10)

Create sweep with ID: yxu5j1ls
Sweep URL: https://wandb.ai/katrinpochtar/churn-prediction/sweeps/yxu5j1ls
Sweep ID: yxu5j1ls
Running 10 sweep iterations...


[34m[1mwandb[0m: Agent Starting Run: kf9rvycm with config:
[34m[1mwandb[0m: 	dropout: 0.4838883117059879
[34m[1mwandb[0m: 	edges_per_node: 3
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 256
[34m[1mwandb[0m: 	learning_rate: 0.04505624043899638
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	weight_decay: 7.341154789519326e-05


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 3 edges per node...
Graph: 138559 nodes, 415674 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.7957
  Train AUC: 0.4614 | Val AUC: 0.4616
  Val Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000
Epoch 020 | Train Loss: 0.6883
  Train AUC: 0.5000 | Val AUC: 0.5000
  Val Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000
Early stopping at epoch 27

Final Test Evaluation
Test AUC: 0.5963
Test Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000
Precision@10%: 0.8181
Model saved: best_gnn_model_full.pth


0,1
epoch,▁▁▂▂▂▂▃▃▃▃▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇██
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▇▄▅▃▇▁▆▄▁▂▄█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
train_loss,▁▅█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,▇▄▅▃▆▁▆▄▁▂▄█▄▄▄▄▄▄▄▄▄▄▄▄▄▄▄
val_f1,▁▁▁██▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,27
test_auc,0.5963
test_f1,0
test_precision,0
test_precision_at_10,0.81812
test_recall,0
train_auc,0.5
train_loss,0.6886
val_auc,0.5
val_f1,0


[34m[1mwandb[0m: Agent Starting Run: x3f7zl2x with config:
[34m[1mwandb[0m: 	dropout: 0.15554012474815426
[34m[1mwandb[0m: 	edges_per_node: 5
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 64
[34m[1mwandb[0m: 	learning_rate: 0.019270528270531
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	weight_decay: 3.1688706580295574e-05


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 5 edges per node...
Graph: 138559 nodes, 692793 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.6351
  Train AUC: 0.7016 | Val AUC: 0.6996
  Val Precision: 0.5929 | Recall: 0.7153 | F1: 0.6483
Epoch 020 | Train Loss: 0.6208
  Train AUC: 0.7086 | Val AUC: 0.7058
  Val Precision: 0.6172 | Recall: 0.5866 | F1: 0.6015
Epoch 030 | Train Loss: 0.6185
  Train AUC: 0.7109 | Val AUC: 0.7079
  Val Precision: 0.6054 | Recall: 0.6711 | F1: 0.6366
Epoch 040 | Train Loss: 0.6154
  Train AUC: 0.7132 | Val AUC: 0.7096
  Val Precision: 0.6049 | Recall: 0

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇███
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▅▇▇▇▇▇▇▇▇▇█████████████████████████████
train_loss,█▇▆▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,▁▅▅▅▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████████████
val_f1,▁▁▂█████████▇███████████████████████████

0,1
epoch,100
test_auc,0.71422
test_f1,0.64133
test_precision,0.60923
test_precision_at_10,0.6651
test_recall,0.677
train_auc,0.72213
train_loss,0.60772
val_auc,0.71394
val_f1,0.63954


[34m[1mwandb[0m: Agent Starting Run: cz19mg14 with config:
[34m[1mwandb[0m: 	dropout: 0.15503378146982816
[34m[1mwandb[0m: 	edges_per_node: 5
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 128
[34m[1mwandb[0m: 	learning_rate: 0.02391278699016154
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.00025004881556633354


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 5 edges per node...
Graph: 138559 nodes, 692790 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.6267
  Train AUC: 0.7035 | Val AUC: 0.7011
  Val Precision: 0.6089 | Recall: 0.5917 | F1: 0.6002
Epoch 020 | Train Loss: 0.6175
  Train AUC: 0.7086 | Val AUC: 0.7067
  Val Precision: 0.6084 | Recall: 0.6541 | F1: 0.6305
Epoch 030 | Train Loss: 0.6159
  Train AUC: 0.7099 | Val AUC: 0.7072
  Val Precision: 0.6029 | Recall: 0.6838 | F1: 0.6408
Epoch 040 | Train Loss: 0.6142
  Train AUC: 0.7114 | Val AUC: 0.7088
  Val Precision: 0.6068 | Recall: 0

0,1
epoch,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇███
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▄▁▅▃▄▅▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇███████████████
train_loss,█▇▅▃▄▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,▄▁▂▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇████████████████
val_f1,▁▁███▇██▇▇▇▇▇▇▇▇▇██▇█▇█▇███▇▇█▇██▇██▇█▇▇

0,1
epoch,100
test_auc,0.71458
test_f1,0.64075
test_precision,0.61144
test_precision_at_10,0.66402
test_recall,0.67301
train_auc,0.71792
train_loss,0.60863
val_auc,0.71437
val_f1,0.63162


[34m[1mwandb[0m: Agent Starting Run: xp13a0jq with config:
[34m[1mwandb[0m: 	dropout: 0.1900091211815353
[34m[1mwandb[0m: 	edges_per_node: 5
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 128
[34m[1mwandb[0m: 	learning_rate: 0.02602153821011336
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	weight_decay: 5.801981644398389e-05


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 5 edges per node...
Graph: 138559 nodes, 692789 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.6897
  Train AUC: 0.5740 | Val AUC: 0.5782
  Val Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000
Epoch 020 | Train Loss: 0.6888
  Train AUC: 0.6783 | Val AUC: 0.6779
  Val Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000
Epoch 030 | Train Loss: 0.6430
  Train AUC: 0.6803 | Val AUC: 0.6778
  Val Precision: 0.5926 | Recall: 0.1202 | F1: 0.1998
Epoch 040 | Train Loss: 0.6272
  Train AUC: 0.7080 | Val AUC: 0.7067
  Val Precision: 0.6119 | Recall: 0

0,1
epoch,▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇██
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▆▄▄▃▃▁▆▆▇▇▇▇▇▇██████████████████████████
train_loss,▃█▃▃▃▃▃▃▄▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,▅▅▆▇▁▇▇▇▇▇██████████████████████████████
val_f1,▁█▁▁▁▁▁▁▁▁██████████████████████████████

0,1
epoch,100
test_auc,0.71475
test_f1,0.62756
test_precision,0.62216
test_precision_at_10,0.66149
test_recall,0.63305
train_auc,0.72163
train_loss,0.61252
val_auc,0.71446
val_f1,0.64696


[34m[1mwandb[0m: Agent Starting Run: b9zmqsje with config:
[34m[1mwandb[0m: 	dropout: 0.10257250506495748
[34m[1mwandb[0m: 	edges_per_node: 10
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 128
[34m[1mwandb[0m: 	learning_rate: 0.021181370904751768
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.0002478497046318255


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 10 edges per node...
Graph: 138559 nodes, 1385581 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.6802
  Train AUC: 0.6960 | Val AUC: 0.6939
  Val Precision: 0.4633 | Recall: 0.9963 | F1: 0.6325
Epoch 020 | Train Loss: 0.6479
  Train AUC: 0.6989 | Val AUC: 0.6965
  Val Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000
Epoch 030 | Train Loss: 0.6401
  Train AUC: 0.6948 | Val AUC: 0.6936
  Val Precision: 0.0000 | Recall: 0.0000 | F1: 0.0000
Early stopping at epoch 34

Final Test Evaluation
Test AUC: 0.6995
Test Precision: 0.0000 | Recall: 0.

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,█▇▁▁▇▇█▆██▅█▇▇███▇███▇▇▇██████████
train_loss,▂█▅▂▂▂▂▂▂▂▄▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,█▇▁▁▆▇█▆██▅██▇███▇███▇▇▇██████████
val_f1,▁█▁▁█▂▁▁▇█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,34
test_auc,0.69946
test_f1,0
test_precision,0
test_precision_at_10,0.68603
test_recall,0
train_auc,0.69038
train_loss,0.63664
val_auc,0.6885
val_f1,0


[34m[1mwandb[0m: Agent Starting Run: we86sngk with config:
[34m[1mwandb[0m: 	dropout: 0.13598296602090737
[34m[1mwandb[0m: 	edges_per_node: 5
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 128
[34m[1mwandb[0m: 	learning_rate: 0.028628421981113664
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	weight_decay: 3.112311512677688e-05


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 5 edges per node...
Graph: 138559 nodes, 692791 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.6839
  Train AUC: 0.6095 | Val AUC: 0.6130
  Val Precision: 0.4556 | Recall: 0.9879 | F1: 0.6236
Early stopping at epoch 19

Final Test Evaluation
Test AUC: 0.6888
Test Precision: 0.5741 | Recall: 0.7271 | F1: 0.6416
Precision@10%: 0.6593
Model saved: best_gnn_model_full.pth


0,1
epoch,▁▁▂▂▃▃▃▄▄▅▅▅▆▆▆▇▇██
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,█▁▆█▃▄▆▇▇▆▇▇▇▇▇████
train_loss,▁█▂▅▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁
val_auc,█▁▆█▃▄▆▇▇▇▇▇▇██████
val_f1,▁█▁████▁▇███▃▁▁▂▆▇▅

0,1
epoch,19
test_auc,0.68882
test_f1,0.64163
test_precision,0.57414
test_precision_at_10,0.65933
test_recall,0.72711
train_auc,0.66957
train_loss,0.66831
val_auc,0.67151
val_f1,0.40685


[34m[1mwandb[0m: Agent Starting Run: l9527ne3 with config:
[34m[1mwandb[0m: 	dropout: 0.1355150106138162
[34m[1mwandb[0m: 	edges_per_node: 10
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 64
[34m[1mwandb[0m: 	learning_rate: 0.019282604324632812
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.00026454269112602033


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 10 edges per node...
Graph: 138559 nodes, 1385580 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.6239
  Train AUC: 0.7060 | Val AUC: 0.7042
  Val Precision: 0.6071 | Recall: 0.6616 | F1: 0.6332
Epoch 020 | Train Loss: 0.6167
  Train AUC: 0.7093 | Val AUC: 0.7069
  Val Precision: 0.5993 | Recall: 0.7102 | F1: 0.6500
Epoch 030 | Train Loss: 0.6153
  Train AUC: 0.7107 | Val AUC: 0.7079
  Val Precision: 0.6052 | Recall: 0.6712 | F1: 0.6365
Epoch 040 | Train Loss: 0.6138
  Train AUC: 0.7119 | Val AUC: 0.7087
  Val Precision: 0.6049 | Recall:

0,1
epoch,▁▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇████
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▄▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████████████████████
train_loss,█▇▄▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,▁▃▄▄▄▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇█████████████████
val_f1,▁▁▆█████████████████████████████████████

0,1
epoch,100
test_auc,0.7149
test_f1,0.64593
test_precision,0.6114
test_precision_at_10,0.66366
test_recall,0.68459
train_auc,0.71958
train_loss,0.60787
val_auc,0.71455
val_f1,0.65253


[34m[1mwandb[0m: Agent Starting Run: ri9fn2il with config:
[34m[1mwandb[0m: 	dropout: 0.1116380036063378
[34m[1mwandb[0m: 	edges_per_node: 5
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 64
[34m[1mwandb[0m: 	learning_rate: 0.012748709124462463
[34m[1mwandb[0m: 	num_layers: 3
[34m[1mwandb[0m: 	weight_decay: 0.00032187366695235947


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 5 edges per node...
Graph: 138559 nodes, 692789 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.6248
  Train AUC: 0.7037 | Val AUC: 0.7015
  Val Precision: 0.5855 | Recall: 0.7562 | F1: 0.6600
Epoch 020 | Train Loss: 0.6184
  Train AUC: 0.7078 | Val AUC: 0.7049
  Val Precision: 0.6016 | Recall: 0.6764 | F1: 0.6368
Epoch 030 | Train Loss: 0.6156
  Train AUC: 0.7096 | Val AUC: 0.7067
  Val Precision: 0.6048 | Recall: 0.6728 | F1: 0.6370
Epoch 040 | Train Loss: 0.6147
  Train AUC: 0.7105 | Val AUC: 0.7077
  Val Precision: 0.6056 | Recall: 0

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇▇███
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇████████████
train_loss,█▂▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,▁▃▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇██████████
val_f1,▁▆██████▇▇██████████████████████████████

0,1
epoch,100
test_auc,0.71528
test_f1,0.64387
test_precision,0.61436
test_precision_at_10,0.66366
test_recall,0.67636
train_auc,0.71981
train_loss,0.60723
val_auc,0.71497
val_f1,0.64372


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: pts81huc with config:
[34m[1mwandb[0m: 	dropout: 0.11215420933060366
[34m[1mwandb[0m: 	edges_per_node: 10
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 64
[34m[1mwandb[0m: 	learning_rate: 0.0033222223845730545
[34m[1mwandb[0m: 	num_layers: 2
[34m[1mwandb[0m: 	weight_decay: 0.0005440010178265623


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 10 edges per node...
Graph: 138559 nodes, 1385578 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.6308
  Train AUC: 0.7032 | Val AUC: 0.7004
  Val Precision: 0.5949 | Recall: 0.7122 | F1: 0.6483
Epoch 020 | Train Loss: 0.6185
  Train AUC: 0.7064 | Val AUC: 0.7036
  Val Precision: 0.5962 | Recall: 0.6939 | F1: 0.6413
Epoch 030 | Train Loss: 0.6166
  Train AUC: 0.7091 | Val AUC: 0.7066
  Val Precision: 0.6006 | Recall: 0.6979 | F1: 0.6456
Epoch 040 | Train Loss: 0.6148
  Train AUC: 0.7099 | Val AUC: 0.7073
  Val Precision: 0.6020 | Recall:

0,1
epoch,▁▁▁▂▃▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▂▃▄▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████
train_loss,█▇▇▆▃▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,▁▂▂▂▂▄▅▆▆▆▅▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇█████
val_f1,▁▁▄▇████████████████████████████████████

0,1
epoch,100
test_auc,0.71095
test_f1,0.64248
test_precision,0.60494
test_precision_at_10,0.66582
test_recall,0.68499
train_auc,0.71446
train_loss,0.61064
val_auc,0.71095
val_f1,0.64248


[34m[1mwandb[0m: Agent Starting Run: esuvr9fd with config:
[34m[1mwandb[0m: 	dropout: 0.1617701909175199
[34m[1mwandb[0m: 	edges_per_node: 10
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hidden_channels: 64
[34m[1mwandb[0m: 	learning_rate: 0.011909109433808438
[34m[1mwandb[0m: 	num_layers: 1
[34m[1mwandb[0m: 	weight_decay: 0.00035709394216824143


Creating temporal dataset...
Total data span: 60 days (2025-09-01 to 2025-10-31)
Observation window: 2025-09-01 to 2025-10-01 (31 days)
Prediction window: 2025-10-01 to 2025-10-31 (30 days)
Users in observation: 138559
Users in prediction: 117408
Churned users: 62568

Extracting user features...
Total users: 138559
Churn rate: 45.16%

Building random graph...
Building random graph with 10 edges per node...
Graph: 138559 nodes, 1385579 edges

Train users: 110847 | Test users: 27712

Using device: cuda

Training GNN model...
Epoch 010 | Train Loss: 0.6186
  Train AUC: 0.7076 | Val AUC: 0.7054
  Val Precision: 0.5965 | Recall: 0.6976 | F1: 0.6431
Epoch 020 | Train Loss: 0.6162
  Train AUC: 0.7094 | Val AUC: 0.7068
  Val Precision: 0.6045 | Recall: 0.6805 | F1: 0.6403
Epoch 030 | Train Loss: 0.6139
  Train AUC: 0.7107 | Val AUC: 0.7083
  Val Precision: 0.6028 | Recall: 0.6912 | F1: 0.6440
Epoch 040 | Train Loss: 0.6131
  Train AUC: 0.7128 | Val AUC: 0.7099
  Val Precision: 0.6059 | Recall:

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇████
test_auc,▁
test_f1,▁
test_precision,▁
test_precision_at_10,▁
test_recall,▁
train_auc,▁▄▅▅▅▅▆▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇███████████████
train_loss,█▆▅▄▄▃▃▃▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_auc,▁▄▄▄▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇███████████████
val_f1,▁▇██▇▇▇▇▆▇▆▇▇▇▇▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▆▇▇▆▇█▆▇▆▇

0,1
epoch,100
test_auc,0.71366
test_f1,0.64266
test_precision,0.60853
test_precision_at_10,0.66474
test_recall,0.68084
train_auc,0.71751
train_loss,0.6087
val_auc,0.71366
val_f1,0.64266
