### Init Context

In [2]:
from thetaray.api.context import init_context
from datetime import datetime  


import yaml

import logging
logging.basicConfig(level=logging.DEBUG, format='%(message)s')

with open('/thetaray/git/solutions/domains/demo_digital_wallets/config/spark_config.yaml') as spark_config_file:
    spark_config = yaml.load(spark_config_file, yaml.FullLoader)['spark_config_a']

context = init_context(
    execution_date=datetime(1970, 2, 1),
    spark_conf=spark_config,
)

2025-08-19 14:55:09,175:INFO:thetaray.common.logging:start loading solution.....[ load_risks=True , solution_path=/thetaray/git/solutions/domains , settings_path=/thetaray/git/solutions/settings ]
2025-08-19 14:55:09,550:INFO:thetaray.common.logging:load_risks took: 0.17912769317626953
2025-08-19 14:55:10,153:INFO:thetaray.common.logging:=== Started updating schema ===


### Imports

In [3]:
from thetaray.api.dataset import dataset_functions
from thetaray.api.evaluation import fit_on_worker
from thetaray.api.histograms import save_histograms
from thetaray.api.evaluation.preprocess.features_extractor import FeaturesExtractor
from thetaray.api.models import save_model
from thetaray.api.anomaly_detection import ThetaRayDetector
from thetaray.api.evaluation.preprocess.numeric_features import NumericFeaturesTransformer

import mlflow
from pyspark.sql import functions as f

from domains.demo_digital_wallets.datasets.customer_monthly import customer_monthly_dataset as customer_monthly_dataset
from domains.demo_digital_wallets.evaluation_flows.ef import evaluation_flow as ef
from thetaray.common.data_environment import DataEnvironment

### Load data

In [4]:
dataset = dataset_functions.read(context, 
                                 customer_monthly_dataset().identifier,
                                 from_job_ts=datetime(1970, 1, 1),
                                 data_environment=DataEnvironment.PUBLIC)
data_pd = dataset.toPandas()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
25/08/19 14:55:14 WARN MetricsConfig: Cannot locate configuration: tried hadoop-metrics2-s3a-file-system.properties,hadoop-metrics2.properties
Hive Session ID = 26fb7254-9422-40d2-8cd2-ada283c9b848
25/08/19 14:55:18 INFO SessionState: Hive Session ID = 26fb7254-9422-40d2-8cd2-ada283c9b848
                                                                                

In [5]:
print('NUMBER OF TRX TO TRAIN THE MODEL:', len(data_pd))

CANTIDAD DE TRANSACCIONES PARA ENTRENAR EL MODELO: 18000


### Model Training

In [5]:
requested_features = ['struct_score', 
                      'rapid_spend', 
                      'crypto_score', 
                      'mto_score',
                      'act_spike',
                      'pct_domestic',
                      'rev_ratio']

nft = NumericFeaturesTransformer(features=requested_features, strategy='constant', fill_value=0.0)
fu = FeaturesExtractor([nft])
trd = ThetaRayDetector(algo_type=['Ny', 'RL', 'NF'],
                       learning_method=1,
                       normalization_type=1,
                       Fusion_threshold=0.5,
                       Rating_percentile=1.0,
                       set_zero_rating=1)

with mlflow.start_run(nested=True):
    feature_extraction_model = fit_on_worker(fu.fit, X=data_pd)
    save_model(ef().evaluation_steps[0].feature_extraction_model.name, feature_extraction_model, tags=ef().evaluation_steps[0].feature_extraction_model.tags)
    detection_model = fit_on_worker(trd.fit, X=feature_extraction_model.transform(data_pd))
    save_model(ef().evaluation_steps[0].detection_model.name, detection_model, tags=ef().evaluation_steps[0].detection_model.tags)
    save_histograms(context, data_pd, requested_features)

2025-08-19 13:55:54.796039: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-19 13:55:54.797837: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-19 13:55:54.800835: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-19 13:55:54.808344: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1755611754.822374   35506 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1755611754.82

##################################################
## Migrating to ModularSystem (AnomalyDetector) ##
##################################################
User Parameters:
Algos_to_run: ['Ny', 'RL', 'NF']
normalization_type: 1
Fusion_threshold: 0.3
Rating_percentile: 5.0
thread_mode: 1
max_wrk: None
nucset_sampling: False


2025-08-19 13:56:05,136:INFO:botocore.credentials:Found credentials in environment variables.
2025/08/19 13:56:05 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: manager_demo_dwallets_fe, version 4
2025-08-19 13:56:18.701449: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
2025/08/19 13:56:26 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: manager_demo_dwallets_ad, version 4


üèÉ View run angry-fish-48 at: https://mlflow:5000/#/experiments/0/runs/4251bc65c8844f68a19566e900e7c6af
üß™ View experiment at: https://mlflow:5000/#/experiments/0


In [None]:
context.close()