In [1]:
import os
import sys
sys.path.append(os.path.abspath('../src'))

import spyt
import pandas as pd
import pyspark.sql.functions as F

from clan_tools.utils.spark import SPARK_CONF_SMALL
from clan_tools.data_adapters.YTAdapter import YTAdapter
from clan_tools.data_adapters.YQLAdapter import YQLAdapter
from clan_tools.secrets.Vault import Vault

from ml_flows.targets.onboarding_targets import PaymentsTarget
from ml_flows.utils.features import ConfigLoader

In [2]:
# system settings
os.environ['NUMEXPR_MAX_THREADS'] = '32'
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 250)

# loading secrets
vault_client = Vault()
vault_client.get_secrets(secret_name='robot-clanalytics-yt')

# initialization of sessions
yql_adapter = YQLAdapter()
yt_adapter = YTAdapter()
spark = spyt.connect(spark_conf_args=SPARK_CONF_SMALL)
spyt.info(spark)

2022-07-27 15:19:38,430 - INFO - spyt.client - SPYT Cluster version: 1.41.0
2022-07-27 15:19:38,433 - INFO - spyt.client - SPYT library version: 1.36.0
2022-07-27 15:19:38,661 - INFO - spyt.client - SHS link: http://sas7-5151-node-hahn.sas.yp-c.yandex.net:27005/history/app-20220727151937-0264/jobs/


In [3]:
# loading test target
custom_date_from = '2022-04-01'
custom_date_to = '2022-04-30'

payment_target = PaymentsTarget(
    spark, yt_adapter, date_from=custom_date_from, date_to=custom_date_to,
    days_in_future=10, amount_for_positive=10)

spdf_name = payment_target.generate_spark_dataframe_name()
print(spdf_name)
spdf = payment_target.generate_spark_dataframe()

obs_all = spdf.count()
obs_pos = spdf.agg(F.sum('target')).collect()[0][0]
print(f'{obs_pos}/{obs_all}  ({round(obs_pos/obs_all, 2)})')

ONB_PaymentsTarget_score_30_future_10_amount_10
241/5643  (0.04)


In [4]:
# example config
features_config = {
    "30": {
        "consumption": {
            "SUM": [
                "billing_record_cost_rub",
                "billing_record_credit_rub",
                "billing_record_total_rub",
                "billing_record_credit_trial_rub",
                "billing_record_expense_rub",
                "billing_record_total_redistribution_rub",
                "billing_record_var_reward_rub",
                "billing_record_real_consumption_rub",
                "sku_lazy_total_rub",
                "sku_non_lazy_total_rub"
            ],
        },
        "payments": {
            "SUM": [
                "paid_amount"
            ]
        },
    },
    "14" : {
        "crypta": {
            "MAX": [
                "pr_additional_education_and_courses",
                "sd_proba_inc_a",
                "sd_proba_inc_b1",
                "sd_proba_inc_b2",
                "sd_proba_inc_c1",
                "sd_proba_inc_c2",
            ],
        },
        "consumption": {
            "SLP": [
                "billing_record_cost_rub",
                "billing_record_credit_rub",
                "billing_record_total_rub",
                "billing_record_credit_trial_rub",
                "billing_record_expense_rub",
                "billing_record_total_redistribution_rub",
                "billing_record_var_reward_rub",
                "billing_record_real_consumption_rub",
                "sku_lazy_total_rub",
                "sku_non_lazy_total_rub",
                "sku_service_group_Adjustments",
                "sku_service_group_Business & Dev Tools",
                "sku_service_group_Data Storage and Analytics",
                "sku_service_group_Infrastructure",
                "sku_service_group_Kubernetes",
                "sku_service_group_ML and AI",
                "sku_service_group_Marketplace",
                "sku_service_group_Professional Services",
                "sku_service_group_Serverless",
                "sku_service_group_Support",             
            ],
        },
        "visits": {
            "SUM": [
                "visit_duration_sum",
            ],
            "AVG": [
                "visits_day_count",
            ],
        },
        "hits": {
            "SUM": [
                "services/compute",
            ],
        },
    },
}

In [5]:
CL = ConfigLoader(spark, yt_adapter, spdf, 'date')
        
dts = CL.load_config(features_config)
ts = dts.limit(25).toPandas()
ts

Preparing 30 - consumption...
Preparing 30 - payments...
Preparing 14 - crypta...
Preparing 14 - consumption...
Preparing 14 - visits...
Preparing 14 - hits...


Unnamed: 0,billing_account_id,date,billing_record_cost_rub_30_sum,billing_record_credit_rub_30_sum,billing_record_total_rub_30_sum,billing_record_credit_trial_rub_30_sum,billing_record_expense_rub_30_sum,billing_record_total_redistribution_rub_30_sum,billing_record_var_reward_rub_30_sum,billing_record_real_consumption_rub_30_sum,sku_lazy_total_rub_30_sum,sku_non_lazy_total_rub_30_sum,paid_amount_30_sum,pr_additional_education_and_courses_14_max,sd_proba_inc_a_14_max,sd_proba_inc_b1_14_max,sd_proba_inc_b2_14_max,sd_proba_inc_c1_14_max,sd_proba_inc_c2_14_max,billing_record_cost_rub_14_slp,billing_record_credit_rub_14_slp,billing_record_total_rub_14_slp,billing_record_credit_trial_rub_14_slp,billing_record_expense_rub_14_slp,billing_record_total_redistribution_rub_14_slp,billing_record_var_reward_rub_14_slp,billing_record_real_consumption_rub_14_slp,sku_lazy_total_rub_14_slp,sku_non_lazy_total_rub_14_slp,sku_service_group_Adjustments_14_slp,sku_service_group_Business & Dev Tools_14_slp,sku_service_group_Data Storage and Analytics_14_slp,sku_service_group_Infrastructure_14_slp,sku_service_group_Kubernetes_14_slp,sku_service_group_ML and AI_14_slp,sku_service_group_Marketplace_14_slp,sku_service_group_Professional Services_14_slp,sku_service_group_Serverless_14_slp,sku_service_group_Support_14_slp,visit_duration_sum_14_sum,visits_day_count_14_avg,services/compute_14_sum
0,dn2009fgmovdhqf1solk,2022-04-06,1336.003645,-1085.127311,250.876333,0.0,250.876333,250.876333,0.0,250.876333,2.120417,248.755917,45.139999,,,,,,,4.552323e-16,4.349603,4.349603,0.0,4.349603,4.349603,0.0,4.349603,0.0367326,4.31287,0.0,0.0,0.0,4.349603,0.0,0.0,0.0,0.0,0.0,0.0,,,
1,dn200er2eicgue8s1lkh,2022-04-29,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,dn200u6ksldn95c1t2ca,2022-04-17,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,dn201tg4ld4e3io2tvfh,2022-04-21,7752.602413,-3906.132185,3846.470229,0.0,3846.470229,3846.470229,0.0,3846.470229,324.17638,3522.293849,3662.630014,0.0,0.012586,0.145188,0.296652,0.538867,0.322275,10.09374,16.81309,26.90683,0.0,26.90683,26.90683,0.0,26.90683,2.051669,24.85516,0.0,0.0,19.592421,7.31441,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0
4,dn204735dcd6vmck6dgo,2022-04-01,366.716607,-366.716607,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.159225,0.564768,0.216926,0.489276,0.178333,,,,,,,,,,,,,,,,,,,,,7432.0,5.5,0.0
5,dn20559ntvadfn7kdv2h,2022-04-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.143995,0.510368,0.365042,0.51619,0.100074,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
6,dn209uivr8i61a2bemvp,2022-04-10,765.086746,-765.086746,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.081222,0.498194,0.271115,0.264544,0.099488,0.3333059,-0.3333059,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,0.0
7,dn20dcqd40qd4i5ti477,2022-04-26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,1.0,0.024211,0.152479,0.252543,0.547776,0.312171,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2939.0,1.444444,0.0
8,dn20eldc9rcbus8f4a3t,2022-04-30,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,dn20jdh7pfm123jh15at,2022-04-09,268.400873,-268.400873,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0273,0.280253,0.279931,0.42564,0.130871,4.28436e-16,-4.28436e-16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
