In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from os.path import join as path_join
from clan_tools.data_adapters.YTAdapter import YTAdapter
from clan_tools.data_adapters.YQLAdapter import YQLAdapter
import spyt
import pyspark.sql.functions as F
import pyspark.sql.types as T
from pyspark.sql.functions import col, lit
from pyspark.sql.window import Window
from clan_tools.utils.spark import SPARK_CONF_MEDIUM

pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 250)
os.environ['NUMEXPR_MAX_THREADS'] = '32'

In [2]:
from clan_tools.secrets.Vault import Vault
Vault().get_secrets('sec-01fm06fw1zsqp08cxtyd247tm5')
yt_adapter = YTAdapter()

spark = spyt.connect(spark_conf_args=SPARK_CONF_MEDIUM)
spyt.info(spark)

2022-01-28 11:28:23,976 - INFO - spyt.client - SPYT Cluster version: 3.0.1-1.23.1+yandex
2022-01-28 11:28:23,978 - INFO - spyt.client - SPYT library version: 1.3.5


In [3]:
def max_by(colname_max, colname_by):
    return F.expr(f"Max_by(`{colname_max}`, `{colname_by}`)")

def min_by(colname_min, colname_by):
    return F.expr(f"Min_by(`{colname_min}`, `{colname_by}`)")

def min_where(colname_min, colname_where, value_where):
    return F.min(F.when(col(colname_where)==value_where, col(colname_min)))

def max_where(colname_max, colname_where, value_where):
    return F.max(F.when(col(colname_where)==value_where, col(colname_max)))

def nvl(colname, value_if_null):
    return F.when(col(colname).isNull(), value_if_null).otherwise(col(colname)).alias(colname)

def str_today():
    return datetime.now().strftime('%Y-%m-%d')

def add_days(n):
    return (datetime.now()+timedelta(days=n)).strftime('%Y-%m-%d')

In [4]:
crm_company_accs_path = '//home/cloud_analytics/import/crm/business_accounts/data'
dwh_bas_prod_path = '//home/cloud-dwh/data/prod/ods/billing/billing_accounts'
yc_cons_prod_path = '//home/cloud-dwh/data/prod/cdm/dm_yc_consumption'
crm_tag_prod_path = '//home/cloud-dwh/data/prod/cdm/dm_ba_crm_tags'

crm_calls_path = '//home/cloud-dwh/data/prod/raw/mysql/crm-cloud/cloud8_calls'
leads_cube_path = '//home/cloud_analytics/kulaga/leads_cube'
oppty_cube_path = '//home/cloud_analytics/kulaga/oppty_cube'
sales_cube_path = '//home/cloud_analytics/kulaga/acc_sales_ba_cube'

dwh_accs = (
    spark.read.yt(dwh_bas_prod_path)
    .select(
        'billing_account_id',
        'person_type',
        'state',
        'usage_status',
        'is_suspended_by_antifraud',
        'block_reason',
        F.to_date(F.to_timestamp('created_at')).alias('date_created')
    )
    .withColumn('tech_date', F.date_add('date_created', 45))
    .join(
        spark.read.yt(crm_tag_prod_path).select('billing_account_id', 'segment_current').distinct(),
        on='billing_account_id', how='left'
    )
    .cache()
)

dwh_accs.limit(10).toPandas()

Unnamed: 0,billing_account_id,person_type,state,usage_status,is_suspended_by_antifraud,block_reason,date_created,tech_date,segment_current
0,dn200bv4qsjb0fec9sbh,individual,suspended,trial,False,trial_expired,2020-04-26,2020-06-10,Mass
1,dn2013u8biata15k2fl0,individual,suspended,trial,False,trial_expired,2019-09-18,2019-11-02,Mass
2,dn201banbibsg37ihit9,individual,suspended,trial,False,trial_expired,2020-09-22,2020-11-06,Mass
3,dn201v2m3ae57drbp08q,individual,suspended,paid,False,unbound_card,2020-01-26,2020-03-11,Mass
4,dn202b0kplcdmrfjbvf8,company,active,paid,False,,2019-04-15,2019-05-30,Mass
5,dn202bihmmoecv31i6bg,individual,suspended,trial,True,manual,2021-02-20,2021-04-06,Mass
6,dn202nk7gs5smf1q71do,individual,suspended,trial,False,trial_expired,2021-09-05,2021-10-20,Mass
7,dn203incdkndainhl85s,individual,suspended,trial,True,antifraud,2021-10-11,2021-11-25,Mass
8,dn2055uk17oict9rin1t,individual,suspended,paid,True,manual,2021-06-30,2021-08-14,Mass
9,dn2056up18j1q6vhp0ee,individual,suspended,trial,False,trial_expired,2021-01-04,2021-02-18,Mass


In [5]:
crm_company_accs = (
    spark.read.yt(crm_company_accs_path)
    .select('billing_account_id')
    .distinct()
)

dwh_company_accs = (
    dwh_accs
    .filter(col('person_type').isin([
        'company',
        'kazakhstan_company',
        'switzerland_nonresident_company'
    ]))
    .select('billing_account_id')
    .distinct()
)

all_company_accs = (
    crm_company_accs
    .union(dwh_company_accs)
    .join(dwh_accs, on='billing_account_id', how='inner')
    .distinct()
    .filter(~col('block_reason').isin(['manual', 'mining']))
    .filter(col('usage_status')=='paid')
    .filter(col('is_suspended_by_antifraud')==False)
    .filter(~col('state').isin(['inactive', 'payment_not_confirmed', 'deleted']))
    .filter(col('segment_current')=='Mass')
)

tt = all_company_accs.toPandas()
tt

Unnamed: 0,billing_account_id,person_type,state,usage_status,is_suspended_by_antifraud,block_reason,date_created,tech_date,segment_current
0,dn25moooa0morhnir4fg,individual,suspended,paid,False,debt,2019-10-04,2019-11-18,Mass
1,dn25v42i0vm2t99llf28,company,suspended,paid,False,debt,2019-07-22,2019-09-05,Mass
2,dn29ol2kieog4otqb6uu,company,active,paid,False,trial_expired,2020-10-15,2020-11-29,Mass
3,dn2b9d01hsi21qbe6rf7,company,active,paid,False,trial_expired,2020-01-10,2020-02-24,Mass
4,dn2c096hiroj7nq99s54,company,active,paid,False,trial_expired,2021-11-10,2021-12-25,Mass
...,...,...,...,...,...,...,...,...,...
2593,dn2ndffs3iinkru5s9eg,company,active,paid,False,trial_expired,2020-12-04,2021-01-18,Mass
2594,dn2qo3vl45ah8inoof3d,company,active,paid,False,trial_expired,2021-04-26,2021-06-10,Mass
2595,dn2qq8be5aggb6gs8ouu,company,suspended,paid,False,debt,2021-03-02,2021-04-16,Mass
2596,dn2u2dojhsmqka305tgo,company,active,paid,False,trial_expired,2020-10-28,2020-12-12,Mass


In [6]:
pd_accs = all_company_accs.select('billing_account_id').distinct().toPandas()
pd_accs['key'] = '0'

pd_dates = pd.Series(pd.date_range(add_days(-14), add_days(-1))).dt.strftime('%Y-%m-%d')
pd_dates = pd.DataFrame(pd_dates, columns=['billing_record_msk_date'])
pd_dates['key'] = '0'

pd_accs_dates = pd_accs.merge(pd_dates, on='key', how='outer')
pd_accs_dates = pd_accs_dates[['billing_account_id', 'billing_record_msk_date']]
spdf_accs_dates = spark.createDataFrame(pd_accs_dates)

ba_cons = (
    spark.read.yt(yc_cons_prod_path)
    .filter(col('billing_record_msk_date')<str_today())
    .filter(col('billing_record_msk_date')>=add_days(-14))
    .groupby('billing_account_id', 'billing_record_msk_date')
    .agg(
        F.sum('billing_record_real_consumption_rub').alias('billing_record_real_consumption_rub'),
        F.sum((col('sku_service_name')!='cloud_ai').astype('decimal')).alias('no_ml_cons'),
    )
    .join(spdf_accs_dates, on=['billing_account_id', 'billing_record_msk_date'], how='right')
    .select(
        'billing_account_id',
        'billing_record_msk_date',
        nvl('billing_record_real_consumption_rub', 0),
        nvl('no_ml_cons', 0)
    )
    .groupby('billing_account_id')
    .agg(
        (F.sum('no_ml_cons')>0).astype('decimal').alias('no_ml_cons'),
        F.greatest(F.mean('billing_record_real_consumption_rub'), lit(0.01)).alias('avg_consumption'),
        F.greatest(F.stddev('billing_record_real_consumption_rub'), lit(0.01)).alias('std_consumption')
    )
    .withColumn('is_plateau', (1.000*col('std_consumption')/col('avg_consumption')<=0.1).astype('decimal'))
    .select('billing_account_id', 'no_ml_cons', 'is_plateau')
    .cache()
)

ba_cons.limit(10).toPandas()

Unnamed: 0,billing_account_id,no_ml_cons,is_plateau
0,dn2s4hknn3l7r8hd5pih,1,1
1,dn2p5qehd37rvi8sh1o5,1,1
2,dn245t02o8rt6bh9emuv,1,1
3,dn28nrfdokovbhnnql2r,0,0
4,dn2iue3d0f30l7sihh0k,1,1
5,dn2nu92425o7v0jdo8os,0,0
6,dn213jc4mq90jicgolql,1,0
7,dn29qak94or9p8fktu2p,0,0
8,dn2md3doqn6om4jl01i6,0,0
9,dn27i8cfpo42acgn4qgr,0,0


In [7]:
current_staff = (
    spark.read.yt('//home/cloud-dwh/data/prod/ods/staff/persons')
    .join(
        spark.read.yt('//home/cloud-dwh/data/prod/ods/staff/PII/persons'),
        on='staff_user_id', how='left'
    )
    .filter(col('department_id')==10701)
    .filter(col('official_is_dismissed')==False)
    .select('staff_user_login')
)

stf_list = current_staff.toPandas()['staff_user_login'].tolist()

date_of_last_lead = (
    spark.read.yt(leads_cube_path)
    .filter(col('lead_source')=='upsell')
    .filter(~col('billing_account_id').isNull())
    .filter(
        col('lead_source_description').isin([
            "Upsell", "upsell", "contact more then 70 days",
            "Consumed more than 40k over last 30 days",
        ]) |
        (F.substring('lead_source_description', 0, 19)=='Potential candidate')
    )
    .withColumn('user_name', F.when(col('user_name').isin(stf_list), col('user_name')))
    .withColumn('user_name', F.last('user_name', ignorenulls=True).over(
        Window
        .partitionBy('billing_account_id')
        .orderBy('date_entered')
        .rowsBetween(Window.unboundedPreceding, Window.currentRow)
    ))
    .groupby('billing_account_id')
    .agg(
        F.max(F.to_date('date_entered')).alias('last_lead_date'),
        max_by('user_name', 'date_entered').alias('last_actual_manager')
    )
)

date_of_last_lead.limit(10).toPandas()

Unnamed: 0,billing_account_id,last_lead_date,last_actual_manager
0,dn20019hmsotshd3h00m,2021-08-21,moiseeva-m
1,dn2002tb4so3qiuv5pug,2020-12-16,moiseeva-m
2,dn2003veg3dhst66v202,2021-05-30,gingerkote
3,dn2005cbd7mvmnqq9umb,2021-10-12,
4,dn2008o4f5a257jlva1j,2021-04-08,gingerkote
5,dn20092p4c9ad4t9ut98,2020-08-01,
6,dn200aim29ftg1mvs525,2021-10-12,
7,dn200gfvhov6bb06vic1,2021-10-12,moiseeva-m
8,dn200jvrdepehoosl1g6,2021-06-11,
9,dn200kfe3vgf6n6aq6tk,2020-02-05,gingerkote


In [8]:
spdf_leads = (
    spark.read.yt(leads_cube_path)
    .select(col('billing_account_id').alias('leads_ba_id'), col('lead_id').alias('parent_id'))
    .distinct()
)

spdf_oppty = (
    spark.read.yt(oppty_cube_path)
    .select(col('ba_id').alias('oppty_ba_id'), col('opp_id').alias('parent_id'))
    .distinct()
)

spdf_sales = (
    spark.read.yt(sales_cube_path)
    .select(col('ba_id').alias('sales_ba_id'), col('acc_id').alias('parent_id'))
    .distinct()
)

date_of_last_held_call = (
    spark.read.yt(crm_calls_path).alias('calls')
    .filter(col('deleted')==0)
    .filter(col('status')=='Held')
    .filter(col('parent_type').isin(['Accounts', 'Leads', 'Opportunities']))
    .join(spdf_leads, on='parent_id', how='left')
    .join(spdf_oppty, on='parent_id', how='left')
    .join(spdf_sales, on='parent_id', how='left')
    .select(
        'id',
        'parent_type',
        F.to_date(F.to_timestamp(col('date_start')/1000000)).alias('call_date'),
        'status',
        'leads_ba_id',
        'oppty_ba_id',
        'sales_ba_id',
        F.coalesce('leads_ba_id', 'oppty_ba_id', 'sales_ba_id').alias('billing_account_id')
    )
    .distinct()
    .filter(~col('billing_account_id').isNull())
    .groupby('billing_account_id')
    .agg(F.max('call_date').alias('last_held_call_date'))
    .cache()
)

print(date_of_last_held_call.count())
date_of_last_held_call.limit(10).toPandas()

23021


Unnamed: 0,billing_account_id,last_held_call_date
0,dn24qbie9257ikmo66gm,2019-07-08
1,dn2tn03v0unji215ltoj,2020-11-27
2,dn2083nldm21nm6jcojo,2019-04-18
3,dn220sd1g2focm6o31oq,2019-04-22
4,dn2tmnooc200usdtgo0v,2021-07-09
5,dn26hf5vrjbvtrjso7v5,2019-04-18
6,dn28babda192gsq6tbru,2021-07-20
7,dn28t8k4jhf8ag71c2c0,2019-04-18
8,dn25juk3qbihksh29dgm,2021-10-21
9,dn2plcfroihbg6cs71mf,2022-01-21


In [9]:
person_data_path = '//home/cloud_analytics/import/crm/leads/contact_info'

contact_info = spark.read.yt(person_data_path)

contact_info.toPandas()

Unnamed: 0,billing_account_id,display_name,email,first_name,last_name,phone
0,dn200017upuhslibj3i4,Евгения Леонидовна Ульдина,Uldina0vrn7@yandex.ru,Евгения,Ульдина,79510297261
1,dn200028bphsvjlgqvrj,Вячеслав Романович Машичев,Mashichev0n1ax@yandex.ru,Вячеслав,Машичев,79089712526
2,dn200044hmorna15pjap,Vadim Andreevich Kiselev,fortyfack76@yandex.ru,Vadim,Kiselev,79772886560
3,dn200086ohv8mj7r444s,Илья Валерьевич Смит,smit.illya@yandex.ru,Илья,Смит,79641389015
4,dn2000jkhome4jed828a,Радик Фаритович Мингажев,radick.mingazhev@yandex.ru,Радик,Мингажев,79914702594
...,...,...,...,...,...,...
273500,dn2vvv5v29n4v39nk92c,Варвара Дмитриевна Ковригина,Onlimusik@yandex.ru,Варвара,Ковригина,79217578285
273501,dn2vvv9h240ugjstsgem,Андрей Дмитриевич Иванов,m4rrras@yandex.ru,Андрей,Иванов,79610230500
273502,dn2vvvhdfj02lamfm70d,Петр Иванович Иванов,sdfhdfshsdh@yandex.ru,Петр,Иванов,79677105568
273503,dn2vvvsam1kcabhqkli0,Михаил Владимирович Бак,michaelbak@yandex.ru,Михаил,Бак,79991216496


In [10]:
spdf_mdb = (
    spark.read.yt(yc_cons_prod_path)
    .filter(col('sku_service_name')=='mdb')
    .groupby('billing_account_id')
    .agg((F.sum('billing_record_cost_rub')>0).astype('decimal').alias('use_mdb'))
)

spdf_mdb.limit(10).toPandas()

Unnamed: 0,billing_account_id,use_mdb
0,dn2cqs65j4ko95ndmnae,1
1,dn2hnuh5ijte2755t899,1
2,dn2ncin3k2vaju3m0a8t,1
3,dn2si0vorjs7irnc83p9,1
4,dn204o23s4l9jajhq6v3,1
5,dn26r5fg7edsoaki2epq,1
6,dn2j34hc2cu4p8mvd2bn,1
7,dn2d76mm2l18hnp7n14a,1
8,dn2decccj6g8mruq939l,1
9,dn2g9hioevncb3ot3e5a,1


In [11]:
spdf_db_on_vm = (
    spark.read.yt('//home/cloud_analytics/import/network-logs/db-on-vm/data')
    .filter(col('billing_account_id')!='')
    .groupby('billing_account_id')
    .agg(F.array_join(F.collect_set('db'), ', ').alias('db_on_vm'))
)

spdf_db_on_vm.limit(10).toPandas()

Unnamed: 0,billing_account_id,db_on_vm
0,dn200017upuhslibj3i4,"mysql, postgresql, clickhouse"
1,dn200028bphsvjlgqvrj,"mysql, postgresql, clickhouse"
2,dn200086ohv8mj7r444s,"mysql, redis, postgresql, clickhouse"
3,dn2000mso2sk0dqp60c6,"mysql, postgresql, clickhouse"
4,dn20019hmsotshd3h00m,"mysql, redis, postgresql, clickhouse"
5,dn2002dnc3fh79gh2s32,postgresql
6,dn2002uqiske4gmnm4t8,"mysql, postgresql, clickhouse"
7,dn2003crp0li9o8npa7m,"mysql, redis, postgresql, clickhouse"
8,dn2003cu9hr6ljmf9nda,"mysql, redis, postgresql, clickhouse"
9,dn2003g4ddtk99obdaat,"mysql, postgresql, clickhouse"


In [12]:
spdf_main = (
    all_company_accs
    .join(ba_cons, on='billing_account_id', how='left')
    .join(date_of_last_lead, on='billing_account_id', how='left')
    .join(date_of_last_held_call, on='billing_account_id', how='left')
    .join(contact_info, on='billing_account_id', how='left')
    .join(spdf_mdb, on='billing_account_id', how='left')
    .join(spdf_db_on_vm, on='billing_account_id', how='left')
    .withColumn('last_lead_date', F.coalesce('last_lead_date', 'tech_date'))
    .withColumn('last_held_call_date', F.coalesce('last_held_call_date', 'tech_date'))
    .select(
        '*',
        F.datediff(lit(str_today()), col('last_lead_date')).alias('last_lead_days_ago'),
        F.datediff(lit(str_today()), col('last_held_call_date')).alias('last_call_days_ago'),
    )
    .withColumn('use_mdb', nvl('use_mdb', 0))
    .withColumn(
        'description',
        F.when(
            (col('use_mdb')==0) & col('db_on_vm').isNotNull(),
            F.concat(lit('Client Use BD on VM: '), 'db_on_vm')
        ).otherwise(lit(''))
    )
    .cache()
)

tt = spdf_main.toPandas()
tt

Unnamed: 0,billing_account_id,person_type,state,usage_status,is_suspended_by_antifraud,block_reason,date_created,tech_date,segment_current,no_ml_cons,is_plateau,last_lead_date,last_actual_manager,last_held_call_date,display_name,email,first_name,last_name,phone,use_mdb,db_on_vm,last_lead_days_ago,last_call_days_ago,description
0,dn25moooa0morhnir4fg,individual,suspended,paid,False,debt,2019-10-04,2019-11-18,Mass,0,0,2020-02-03,gingerkote,2019-10-28,Mds __ __,mshare@yandex.ru,Mds,__,79165403339,0,"mysql, redis, postgresql, clickhouse",725,823,"Client Use BD on VM: mysql, redis, postgresql,..."
1,dn25v42i0vm2t99llf28,company,suspended,paid,False,debt,2019-07-22,2019-09-05,Mass,0,0,2021-09-06,gingerkote,2019-09-05,"ООО ""Волга-Тур""",levina@alyanstour.ru,,Волга-Тур,79873955682,0,"mysql, redis, postgresql, clickhouse",144,876,"Client Use BD on VM: mysql, redis, postgresql,..."
2,dn29ol2kieog4otqb6uu,company,active,paid,False,trial_expired,2020-10-15,2020-11-29,Mass,1,1,2021-05-16,nikitagrekhov,2021-10-11,Общество с ограниченной ответственностью «Ю-СРМ»,tcapb@yandex.ru,,ООО «Ю-СРМ»,79112474576,0,,257,109,
3,dn2b9d01hsi21qbe6rf7,company,active,paid,False,trial_expired,2020-01-10,2020-02-24,Mass,1,0,2020-11-15,gingerkote,2020-11-20,ООО Хорум,admin@horum.dev,,Хорум,79217485037,0,"mysql, redis, postgresql, clickhouse",439,434,"Client Use BD on VM: mysql, redis, postgresql,..."
4,dn2c096hiroj7nq99s54,company,active,paid,False,trial_expired,2021-11-10,2021-12-25,Mass,1,1,2021-12-25,,2022-01-21,"Общество с ограниченной ответсвенностью ""Образ...",alex@enjoy-camp.ru,,"ООО ""Образовательные инновации""",79164447737,0,"mysql, redis, postgresql, clickhouse",34,7,"Client Use BD on VM: mysql, redis, postgresql,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2593,dn2ndffs3iinkru5s9eg,company,active,paid,False,trial_expired,2020-12-04,2021-01-18,Mass,0,0,2021-01-18,,2021-01-18,Индивидуальный предприниматель Кочнева Анастас...,check@rukodelov.ru,,ИП Кочнева Анастасия Юрьевна,79165073868,0,,375,375,
2594,dn2qo3vl45ah8inoof3d,company,active,paid,False,trial_expired,2021-04-26,2021-06-10,Mass,0,0,2021-06-10,,2021-06-10,"Общество с ограниченной ответственностью ""Техн...",piv@cce.su,,"ООО ""ТУиС""",79138298719,0,,232,232,
2595,dn2qq8be5aggb6gs8ouu,company,suspended,paid,False,debt,2021-03-02,2021-04-16,Mass,0,0,2021-11-14,moiseeva-m,2021-11-24,Общество с ограниченной ответственностью «Прес...,KardosRiteil@yandex.ru,,"ООО ""Пресса ру""",79268005301,0,,75,65,
2596,dn2u2dojhsmqka305tgo,company,active,paid,False,trial_expired,2020-10-28,2020-12-12,Mass,0,0,2021-02-02,,2021-06-10,ИП Ищенко Олег Анатольевич,cloudbaser@yandex.ru,,ИП Ищенко Олег Анатольевич,79175243178,0,,360,232,


In [13]:
upsell_leads_temp = (
    spdf_main
    .filter(col('is_plateau')==1)
    .filter(col('no_ml_cons')==1)
    .filter(col('last_lead_days_ago')>70)
    .filter(col('last_call_days_ago')>30)
    .sort(nvl('last_lead_days_ago', 1000).desc())
    .limit(40)
)

curr_timestamp = int(datetime.today().timestamp())
upsell_leads = (
    upsell_leads_temp
    .select(
        lit(curr_timestamp).alias('Timestamp'),
        lit(None).astype(T.StringType()).alias('CRM_Lead_ID'),
        F.concat(lit('["'), 'billing_account_id', lit('"]')).alias('Billing_account_id'),
        lit(None).astype(T.StringType()).alias('Status'),
        col('description').alias('Description'),
        F.coalesce('last_actual_manager', lit('admin')).alias('Assigned_to'),
        col('first_name').alias('First_name'),
        col('last_name').alias('Last_name'),
        col('phone').alias('Phone_1'),
        lit(None).astype(T.StringType()).alias('Phone_2'),
        col('email').alias('Email'),
        lit('upsell').alias('Lead_Source'),
        lit('upsell').alias('Lead_Source_Description'),
        lit(None).astype(T.StringType()).alias('Callback_date'),
        lit(None).astype(T.StringType()).alias('Last_communication_date'),
        lit(None).astype(T.StringType()).alias('Promocode'),
        lit(None).astype(T.StringType()).alias('Promocode_sum'),
        lit(None).astype(T.StringType()).alias('Notes'),
        lit(None).astype(T.StringType()).alias('Dimensions'),
        lit(None).astype(T.StringType()).alias('Tags'),
        lit('').alias('Timezone'),
        col('display_name').alias('Account_name')
    )
)

upsell_leads.toPandas()

Unnamed: 0,Timestamp,CRM_Lead_ID,Billing_account_id,Status,Description,Assigned_to,First_name,Last_name,Phone_1,Phone_2,Email,Lead_Source,Lead_Source_Description,Callback_date,Last_communication_date,Promocode,Promocode_sum,Notes,Dimensions,Tags,Timezone,Account_name
0,1643358877,,"[""dn2muo28g4her430cks9""]",,"Client Use BD on VM: mysql, redis, postgresql,...",admin,Иван,Варанкин,79112207276,,varankin54@yandex.ru,upsell,upsell,,,,,,,,,Иван Александрович Варанкин
1,1643358877,,"[""dn2t3ceqst6b6ut9teel""]",,"Client Use BD on VM: mysql, redis, postgresql,...",gingerkote,Максим,Волков,79106631052,,marahal@yandex.ru,upsell,upsell,,,,,,,,,Максим Леондович Волков
2,1643358877,,"[""dn2onlje8gci28lvburl""]",,"Client Use BD on VM: mysql, redis, postgresql,...",dmtroe,Алексей,Прибытков,79276126473,,ladavtor@yandex.ru,upsell,upsell,,,,,,,,,Алексей Андреевич Прибытков
3,1643358877,,"[""dn2kth2vqfuvcq3ra2l4""]",,,gingerkote,Руслан,Сеитханов,77016438757,,r.seitkhanov@solo-interier.kz,upsell,upsell,,,,,,,,,Руслан Сейтканович Сеитханов
4,1643358877,,"[""dn24u79dcdqah89752i4""]",,"Client Use BD on VM: mysql, redis, postgresql,...",moiseeva-m,Евгений,Головинов,79268125031,,evgeny@golovinov.info,upsell,upsell,,,,,,,,,Евгений Эдуардович Головинов
5,1643358877,,"[""dn2k0668h8mgpa9rf3u9""]",,"Client Use BD on VM: mysql, redis, postgresql,...",gingerkote,Максим,Лаптев,79686381277,,maxim.laptev2014@yandex.ru,upsell,upsell,,,,,,,,,Максим Александрович Лаптев
6,1643358877,,"[""dn2aveanh9rephqmpnah""]",,"Client Use BD on VM: mysql, redis, postgresql,...",nikitagrekhov,Александр,Миловидов,79169534164,,manowar@aha.ru,upsell,upsell,,,,,,,,,Александр Николаевич Миловидов
7,1643358877,,"[""dn2flcq7v1cqejo2aitl""]",,,gingerkote,Захар,Давыдов,447455190769,,davydov@studiodart.eu,upsell,upsell,,,,,,,,,Захар Андреевич Давыдов
8,1643358877,,"[""dn2qf0atlki5khrj4462""]",,,gingerkote,,ИП Попков Альберт Михайлович,79255458055,,albertpopkov@yandex.ru,upsell,upsell,,,,,,,,,ИП Попков Альберт Михайлович
9,1643358877,,"[""dn2iuvntil6a189jcfrp""]",,"Client Use BD on VM: mysql, redis, postgresql,...",moiseeva-m,Сергей,Тишинов,79168054161,,tishinovs@gmail.com,upsell,upsell,,,,,,,,,Сергей Григорьевич Тишинов


In [14]:
contact_more_than_70_days = (
    spdf_main
    .join(upsell_leads_temp, on='billing_account_id', how='leftanti')
    .filter(col('last_held_call_date').isNotNull())
    .filter(col('last_lead_days_ago')>70)
    .filter(col('last_call_days_ago')>70)
    .sort(nvl('last_call_days_ago', 1000).desc())
    .limit(40)
    .select(
        lit(curr_timestamp).alias('Timestamp'),
        lit(None).astype(T.StringType()).alias('CRM_Lead_ID'),
        F.concat(lit('["'), 'billing_account_id', lit('"]')).alias('Billing_account_id'),
        lit(None).astype(T.StringType()).alias('Status'),
        col('description').alias('Description'),
        F.coalesce('last_actual_manager', lit('admin')).alias('Assigned_to'),
        col('first_name').alias('First_name'),
        col('last_name').alias('Last_name'),
        col('phone').alias('Phone_1'),
        lit(None).astype(T.StringType()).alias('Phone_2'),
        col('email').alias('Email'),
        lit('upsell').alias('Lead_Source'),
        lit('contact more then 70 days').alias('Lead_Source_Description'),
        lit(None).astype(T.StringType()).alias('Callback_date'),
        lit(None).astype(T.StringType()).alias('Last_communication_date'),
        lit(None).astype(T.StringType()).alias('Promocode'),
        lit(None).astype(T.StringType()).alias('Promocode_sum'),
        lit(None).astype(T.StringType()).alias('Notes'),
        lit(None).astype(T.StringType()).alias('Dimensions'),
        lit(None).astype(T.StringType()).alias('Tags'),
        lit('').alias('Timezone'),
        col('display_name').alias('Account_name')
    )
)

print(contact_more_than_70_days.count())

contact_more_than_70_days.toPandas()

40


Unnamed: 0,Timestamp,CRM_Lead_ID,Billing_account_id,Status,Description,Assigned_to,First_name,Last_name,Phone_1,Phone_2,Email,Lead_Source,Lead_Source_Description,Callback_date,Last_communication_date,Promocode,Promocode_sum,Notes,Dimensions,Tags,Timezone,Account_name
0,1643358877,,"[""dn2tiund0b32idhph3me""]",,,admin,,ООО «МишельАвто»,79251760977,,admmavto@yandex.ru,upsell,contact more then 70 days,,,,,,,,,Общество с ограниченной ответственностью «Мише...
1,1643358877,,"[""dn2vv6a9075kq276hpi2""]",,,admin,,ТИМКА-ТИМКА.РУ,79213290269,,marat@popeye.ws,upsell,contact more then 70 days,,,,,,,,,"ООО ""ТИМКА-ТИМКА.РУ"""
2,1643358877,,"[""dn2nhou3kuqvpvjiono1""]",,,admin,,Олпрайм,78312128981,,olprime@yandex.ru,upsell,contact more then 70 days,,,,,,,,,ООО Олпрайм
3,1643358877,,"[""dn27alahfginpr1o19gp""]",,,admin,,Test Yandex LLC,79645777830,,yc.sag@yandex.ru,upsell,contact more then 70 days,,,,,,,,,Test Yandex LLC
4,1643358877,,"[""dn2nhbgq6g78f065o3md""]",,"Client Use BD on VM: mysql, redis, postgresql,...",nikitagrekhov,Анастасия,Морозова,79095406401,,m3.npk.ett@yandex.ru,upsell,contact more then 70 days,,,,,,,,,Анастасия Викторовна Морозова
5,1643358877,,"[""dn2kmtp654vg0jmjkmv2""]",,,gingerkote,,"ООО ""ФИНПОРТ""",79163245770,,info@finport.ru,upsell,contact more then 70 days,,,,,,,,,"ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ ""ФИНА..."
6,1643358877,,"[""dn2ufei2650d1espulju""]",,,admin,,АО ДКИ,79201911686,,erofeev.org@yandex.ru,upsell,contact more then 70 days,,,,,,,,,"АО ""ДомКом Инвест"""
7,1643358877,,"[""dn22pm653ddlbk8mjsu3""]",,,admin,Евгений,Ожиганов,79524444921,,cloud@vozro.ru,upsell,contact more then 70 days,,,,,,,,,Евгений Николаевич Ожиганов
8,1643358877,,"[""dn23ev2dfipdonjentp2""]",,,admin,Сергей,Бормотов,79265507635,,foxvillage@yandex.ru,upsell,contact more then 70 days,,,,,,,,,Сергей Иванович Бормотов
9,1643358877,,"[""dn29bvbf4jke72or96d9""]",,,dmtroe,,"ООО ""МЕДТРЕЙД СПБ""",79676303378,,medtrade.vm@yandex.ru,upsell,contact more then 70 days,,,,,,,,,"ОБЩЕСТВО С ОГРАНИЧЕННОЙ ОТВЕТСТВЕННОСТЬЮ ""МЕДТ..."


In [15]:
upsell_leads.union(contact_more_than_70_days).toPandas()

Unnamed: 0,Timestamp,CRM_Lead_ID,Billing_account_id,Status,Description,Assigned_to,First_name,Last_name,Phone_1,Phone_2,Email,Lead_Source,Lead_Source_Description,Callback_date,Last_communication_date,Promocode,Promocode_sum,Notes,Dimensions,Tags,Timezone,Account_name
0,1643358877,,"[""dn2muo28g4her430cks9""]",,"Client Use BD on VM: mysql, redis, postgresql,...",admin,Иван,Варанкин,79112207276,,varankin54@yandex.ru,upsell,upsell,,,,,,,,,Иван Александрович Варанкин
1,1643358877,,"[""dn2t3ceqst6b6ut9teel""]",,"Client Use BD on VM: mysql, redis, postgresql,...",gingerkote,Максим,Волков,79106631052,,marahal@yandex.ru,upsell,upsell,,,,,,,,,Максим Леондович Волков
2,1643358877,,"[""dn2onlje8gci28lvburl""]",,"Client Use BD on VM: mysql, redis, postgresql,...",dmtroe,Алексей,Прибытков,79276126473,,ladavtor@yandex.ru,upsell,upsell,,,,,,,,,Алексей Андреевич Прибытков
3,1643358877,,"[""dn2kth2vqfuvcq3ra2l4""]",,,gingerkote,Руслан,Сеитханов,77016438757,,r.seitkhanov@solo-interier.kz,upsell,upsell,,,,,,,,,Руслан Сейтканович Сеитханов
4,1643358877,,"[""dn24u79dcdqah89752i4""]",,"Client Use BD on VM: mysql, redis, postgresql,...",moiseeva-m,Евгений,Головинов,79268125031,,evgeny@golovinov.info,upsell,upsell,,,,,,,,,Евгений Эдуардович Головинов
5,1643358877,,"[""dn2k0668h8mgpa9rf3u9""]",,"Client Use BD on VM: mysql, redis, postgresql,...",gingerkote,Максим,Лаптев,79686381277,,maxim.laptev2014@yandex.ru,upsell,upsell,,,,,,,,,Максим Александрович Лаптев
6,1643358877,,"[""dn2aveanh9rephqmpnah""]",,"Client Use BD on VM: mysql, redis, postgresql,...",nikitagrekhov,Александр,Миловидов,79169534164,,manowar@aha.ru,upsell,upsell,,,,,,,,,Александр Николаевич Миловидов
7,1643358877,,"[""dn2flcq7v1cqejo2aitl""]",,,gingerkote,Захар,Давыдов,447455190769,,davydov@studiodart.eu,upsell,upsell,,,,,,,,,Захар Андреевич Давыдов
8,1643358877,,"[""dn2qf0atlki5khrj4462""]",,,gingerkote,,ИП Попков Альберт Михайлович,79255458055,,albertpopkov@yandex.ru,upsell,upsell,,,,,,,,,ИП Попков Альберт Михайлович
9,1643358877,,"[""dn2iuvntil6a189jcfrp""]",,"Client Use BD on VM: mysql, redis, postgresql,...",moiseeva-m,Сергей,Тишинов,79168054161,,tishinovs@gmail.com,upsell,upsell,,,,,,,,,Сергей Григорьевич Тишинов


In [16]:
spyt.stop(spark)