In [54]:
import scipy.stats as sps
from sklearn.model_selection import train_test_split
from sklearn.model_selection import train_test_split
import gc
from dateutil.parser import *
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import string
import os
os.chdir('/Users/lunin-dv/Desktop/Library/')
import importlib
import robot_lib as lib
import operator
import re
importlib.reload(lib)
os.chdir('/Users/lunin-dv/Desktop/data')

In [58]:
leads_df = max(lib.find_tables_in_hahn_folder("//home/cloud_analytics/dwh/raw/crm/leads"))

In [56]:
df = lib.execute_query("""
SELECT
    DISTINCT
    type,
    id,
    source_id,
    promocode_proposer,
    ticket,
    created_time,
    start_using_grant_time,
    end_time,
    initial_amount,
    is_activated,
    billing_account_id,
    consumed_amount
FROM (
    SELECT
        DISTINCT 
        'promocode' as type,
        ticket_grants.id as source_id,
        is_activated.id as id,
        visitParamExtractRaw(proposed_meta, 'staffLogin') as promocode_proposer,
        replaceRegexpOne(
        visitParamExtractRaw(upper(proposed_meta), 'REASON') || visitParamExtractRaw(upper(proposed_meta), 'TICKET'), 
        '.*CLOUD(.*)-([\d]*).*', 'CLOUD\\1-\\2') as ticket,
        toDateTime(created_at) as created_time,
        toDateTime(start_time) as start_using_grant_time,
        toFloat32(initial_amount) as initial_amount,
        ifNull(end_time_grant, toDateTime(expiration_time)) as end_time,
        is_activated,
        billing_account_id
    FROM "//home/cloud/billing/exported-billing-tables/monetary_grant_offers_prod" as ticket_grants
    ANY LEFT JOIN (
    SELECT
        DISTINCT
            billing_account_id,
            source_id,
            start_time,
            1 as is_activated,
            id,
            toDateTime(end_time) as end_time_grant
        FROM "//home/cloud/billing/exported-billing-tables/monetary_grants_prod"
    ) as is_activated
    ON ticket_grants.id == is_activated.source_id
    
    UNION ALL
    
    SELECT
        DISTINCT
        'grant' as type,
        source_id,
        id,
        '' as promocode_proposer,
        splitByChar(' ', assumeNotNull(upper(source_id)))[1] as ticket,
        toDateTime(created_at) as created_time,
        toDateTime(start_time) as start_using_grant_time,
        toFloat32(initial_amount) as initial_amount,
        toDateTime(end_time) as end_time,
        1 as is_activated,
        billing_account_id
    FROM "//home/cloud/billing/exported-billing-tables/monetary_grants_prod"
    WHERE lower(source) == 'st'
) as main
LEFT JOIN (
    SELECT
        DISTINCT 
            id,
            if(consumed_amount < 0, 0, consumed_amount) as consumed_amount
    FROM "//home/cloud_analytics/import/billing/grants_spending"
) as consume_info
ON main.id == consume_info.id
WHERE lower(ticket) like 'cloud%'
AND ticket like '%-%'
ORDER BY ticket DESC
FORMAT TabSeparatedWithNames
""")

In [63]:
lead_source_df = lib.execute_query(f"""
SELECT
    promocode as source_id,
    any(lead_source) as lead_source,
    any(lead_source_description) as lead_source_description
FROM "{leads_df}"
WHERE source_id != ''
and isNotNull(source_id)
GROUP BY promocode
FORMAT TabSeparatedWithNames
""")

In [65]:
df = pd.merge(df, lead_source_df, on=['source_id'], how='left')

In [69]:
additional_account_info = lib.execute_query("""
SELECT
    DISTINCT
        billing_account_id,
        first_name,
        last_name,
        phone,
        user_settings_email as email,
        ba_state,
        segment,
        account_name,
        if (client_type == 'company', 'company', 'individual') as client_type,
        sales_name,
        if (first_first_paid_consumption_datetime != '0000-00-00 00:00:00',
            toDate(first_first_paid_consumption_datetime),
            null) as go_to_paid_date,
        if (first_first_trial_consumption_datetime != '0000-00-00 00:00:00',
            toDate(first_first_trial_consumption_datetime),
            null) as go_to_trial_date,
        toDate(first_ba_created_datetime) as ba_created_datetime
FROM "//home/cloud_analytics/cubes/acquisition_cube/cube" as cube
ANY LEFT JOIN (
    SELECT
        DISTINCT 
        billing_account_id,
        'company' as client_type
    FROM (
        SELECT
            DISTINCT
            billing_account_id,
            if (ba_person_type like '%company%', 1, is_corporate_card) as is_company
        FROM "//home/cloud_analytics/cubes/acquisition_cube/cube"
        WHERE is_company == 1
    
        UNION ALL
        SELECT 
            billing_account_id,
            1 as is_company
        FROM "//home/cloud_analytics/import/crm/business_accounts/data"
    )
) as is_company
ON cube.billing_account_id == is_company.billing_account_id
WHERE event == 'ba_created'
OR event == 'cloud_created'
AND billing_account_id != ''
FORMAT TabSeparatedWithNames
""")

In [70]:
assert len(additional_account_info) == additional_account_info['billing_account_id'].unique().shape[0]

In [71]:
df = pd.merge(df, additional_account_info, how='left', on='billing_account_id')

In [72]:
df['ticket'] = df['ticket'].apply(lambda x: ''.join(x.replace('\\t', "\t").split(string.punctuation)))
df['ticket'] = df['ticket'].apply(lambda x: ''.join(x.replace('\t', "").split(string.punctuation)))

In [73]:
grant_information = lib.get_wiki_table('users/lunin-dv/grants-information-table/')

In [74]:
grouped_grant_information = grant_information.groupby('ticket')
rows = []
for ticket, table in grouped_grant_information:
    row = {}
    row['ticket'] = ticket
    row['grant_company_name'] = ""
    interest = set(table['grant_company_name']) - set([""])
    if len(interest) > 0:
        row['grant_company_name'] = list(interest)[0].strip()
        
    row['direction'] = f""
    interest = set(table['direction']) - set([""])
    if len(interest) > 0:
        row['direction'] = list(interest)[0]
        
    row['upsell_experiment_names'] = f""
    
    interest = " ".join(
        set([val for x in table['upsell_experiment_names'] for val in x.split(' ')]) - 
        set([""]))
    row['upsell_experiment_names'] = interest
    rows.append(row)

In [75]:
grant_information = pd.DataFrame(rows)

In [76]:
grant_information['upsell_experiment_names'].unique()

array(['', 'Upsell-1_mdb_on_vm_1_iteration', 'ML_upsell test_experiment'],
      dtype=object)

In [77]:
lib.replace_wiki_table('users/lunin-dv/grants-information-table/', grant_information)

In [78]:
final_table = pd.merge(df, grant_information, on='ticket', how='left')

In [79]:
final_table['direction'] = final_table['direction'].apply(lambda x: 'unknown' if pd.isnull(x) or x == '' else x)
final_table['grant_company_name'] = final_table[['grant_company_name', 'ticket']].apply(
    lambda x: "" if pd.isnull(x['grant_company_name']) else x['grant_company_name'].replace("'", ""),
    axis=1)

In [80]:
final_table['ticket_query'] = final_table['ticket'].apply(lambda x: x.split('-')[0])

In [81]:
from startrek_client import Startrek
from startrek_client.settings import VERSION_SERVICE

In [82]:
client = Startrek(useragent="robot-clanalytics-yt", 
                  base_url="https://st-api.yandex-team.ru/v2/myself", token=lib.startrek_token)

In [83]:
def ticket_assignee(x):
    try:
        return x.assignee.id
    except Exception:
        return '-'


def ticket_creator(x):
    try:
        return x.createdBy.id
    except Exception:
        return '-'


def ticket_tags(x):
    try:
        if len(x.tags) == 0:
            return '-'
        return (','.join(x.tags)).replace(' ', '')
    except Exception:
        return '-'

def ticket_components(x):
    try:
        components = [component.name for component in x.components]
        if len(components) == 0:
            return '-'
        return (','.join(components)).replace(' ', '')
    except Exception:
        return '-'


def ticket_summary(x):
    try:
        return x.summary
    except Exception:
        return '-'
    


In [84]:
tickets_func = ['ticket_assignee', 'ticket_creator', 'ticket_tags', 'ticket_components', 'ticket_summary']

In [85]:
def all_ticket_info(ticket):
    try:
        ticket_info = client.issues[ticket]
        return {name: globals()[name](ticket_info) for name in tickets_func}
    except Exception:
        print(ticket)
        return {name: '-' for name in tickets_func}

In [86]:
all_ticket_info('CLOUDPS-851')

{'ticket_assignee': 'megaeee',
 'ticket_creator': 'dabari',
 'ticket_tags': 'ya_dwh',
 'ticket_components': '-',
 'ticket_summary': 'Промокоды для пользователей, кто ушёл'}

Нет доступа к 

- CLOUDCONTACT
- CLOUDPROJECTS
- CLOUDFRONT
- CLOUDCRM

In [87]:
ticket_dict = {ticket: all_ticket_info(ticket) for ticket in final_table['ticket'].unique()}

ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.


CLOUDPROJECTS-88


ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_track

CLOUDFRONT-632
CLOUDFRONT-600
CLOUDFRONT-566
CLOUDFRONT-554
CLOUDFRONT-543


ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.


CLOUDFRONT-533
CLOUDFRONT-523
CLOUDFRONT-517
CLOUDFRONT-513
CLOUDFRONT-509
CLOUDFRONT-506


ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_track

CLOUDFRONT-504
CLOUDFRONT-492
CLOUDFRONT-489
CLOUDFRONT-470
CLOUDFRONT-455
CLOUDFRONT-450
CLOUDFRONT-2034


ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.


CLOUDFRONT-100


ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.
ERROR:yandex_tracker_client.connection:Tracker errors: 403 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Нет доступа к задаче.


CLOUDCRM-475
CLOUDCONTACT-672
CLOUDCONTACT-1585


ERROR:yandex_tracker_client.connection:Tracker errors: 404 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Задача не существует.


CLOUDBIZ-5424


ERROR:yandex_tracker_client.connection:Tracker errors: 404 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Задача не существует.


CLOUDBIZ-25474


ERROR:yandex_tracker_client.connection:Tracker errors: 404 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Задача не существует.
ERROR:yandex_tracker_client.connection:Tracker errors: 404 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Задача не существует.


CLOUD-2222222
CLOUD-211111


ERROR:yandex_tracker_client.connection:Tracker errors: 404 {}
ERROR:yandex_tracker_client.connection:1 messages follow:
ERROR:yandex_tracker_client.connection: - Задача не существует.


CLOUD-129644


In [88]:
for name in tickets_func:
    final_table[name] = final_table['ticket'].apply(lambda x: ticket_dict[x][name])

In [89]:
final_table.replace('', '-', inplace=True)

In [90]:
final_table['billing_account_id'].unique().shape

(7781,)

In [91]:
lib.save_table('offers_grants_information_table', '//home/cloud_analytics/lunin-dv/grants', final_table)