In [6]:
import pandas as pd, datetime, ast, json
from nile.api.v1 import (
    clusters,
    aggregators as na,
    extractors as ne,
    filters as nf,
    Record
)
from vault_client import instances

def get_datetime_from_epoch(epoch):
    try:
        return str(datetime.datetime.fromtimestamp(int(epoch)))
    except:
        return None

def date_range_by_days(start_str, end_str):
    start = datetime.datetime.strptime(start_str, '%Y-%m-%d')
    end = datetime.datetime.strptime(end_str, '%Y-%m-%d')
    delta = int((end - start).days) + 1
    date_list = []
    
    for i in range(delta):
        date_list.append( str((start + datetime.timedelta(days = i)).date()) )
    return date_list

def get_last_not_empty_table(folder_path):
    tables_list = sorted([folder_path + '/' + x for x in job.driver.list(folder_path)], reverse=True)
    last_table_rows = 0
    last_table = ''
    for table in tables_list:
        try:
            table_ = job.driver.read(table)
        except:
            continue
        
        if table_.row_count > last_table_rows:
            last_table_rows =  table_.row_count
            last_table = table
    if last_table:
        return last_table
    else:
        return tables_list[0]


def get_table_list(folder_path):
    tables_list = sorted([folder_path + '/' + x for x in job.driver.list(folder_path)], reverse=True)
    return '{%s}' % (','.join(tables_list))




def get_event_details(rec, field_):
    if '_rest' in rec:
        try:
            return json.loads(rec['_rest']['metadata'])[field_]
        except:
            return ''
    else:
        try:
            return json.loads(rec['metadata'])[field_]
        except:
            return ''

def get_event_meta(rec):
    if '_rest' in rec:
        return rec['_rest']
    else:
        return rec['metadata']

def get_reason(rec):
    suspend = str(get_event_details(rec, 'suspend_reason'))
    block = str(get_event_details(rec, 'block_reason'))
    fraud_detected = str(get_event_details(rec, 'fraud_detected_by'))
    if suspend:
        return suspend
    if block:
        return block
    if fraud_detected:
        return fraud_detected.replace('[', '').replace(']', '').replace("u'", '').replace("'", '')
    
def get_status_changes(groups):
    for key, records in groups:
        status = ''
        for rec in records:
            if rec['state'] != status:
                result_dict = {
                    'event': 'ba_status_changed',
                    'event_time': get_datetime_from_epoch(rec['updated_at']),
                    'ba_created_time': get_datetime_from_epoch(rec['created_at']),
                    'ba_status': rec['state'],
                    'metadata': get_event_meta(rec),
                    'suspend_reason': str(get_event_details(rec, 'suspend_reason')),
                    'fraud_detected_by': str(get_event_details(rec, 'fraud_detected_by')),
                    'block_reason': str(get_event_details(rec, 'block_reason')),
                    'block_reason': get_reason(rec),
                    'event_details': {
                        'reason': get_reason(rec),
                        'old_status': status,
                        'new_status': rec['state']
                    }
                }
                
                status = rec['state']
                yield Record(key, **result_dict)

In [2]:
client = instances.Production()
yt_creds = client.get_version('ver-01d33pgv8pzc7t99s3egm24x47')
cluster = clusters.yt.Hahn(
    token = yt_creds['value']['token'],
    pool = yt_creds['value']['pool']
)
job = cluster.job()
ba_table = get_last_not_empty_table('//home/logfeller/logs/yc-billing-export-billing-accounts/1h')
ba_history_table = get_last_not_empty_table('//home/logfeller/logs/yc-billing-export-billing-accounts-history/1h')

In [67]:
def get_reason(metadata_):
    if metadata_ not in ['', None]:
        metadata =  json.loads(metadata_)
    else:
        metadata = None
    
    if metadata:
        if 'block_reason' in metadata:
            return metadata['block_reason']
        if 'suspend_reason' in metadata:
            return metadata['suspend_reason']
        #if 'fraud_detected_by' in metadata:
            #if isinstance(metadata['fraud_detected_by'], list):
                #return metadata['fraud_detected_by'][0]
            #else:
                #return metadata['fraud_detected_by'].replace('[', '').replace(']', '').replace("u'", '').replace("'", '')
    return 'Unknown'

In [None]:
job = cluster.job()

clouds = job.table(ba_history_table)

clouds = clouds \
    .groupby(
        'billing_account_id'
    ) \
    .sort(
        'updated_at'
    ) \
    .reduce(
        get_status_changes
    ) \
    .put('//home/cloud_analytics/events/ba_statuses/temp')
job.run()

In [7]:
job = cluster.job()
temp = job.table('//home/cloud_analytics/events/ba_statuses/temp') \
    .put('//home/cloud_analytics/events/ba_statuses/ba_status_changes')
job.run()

VBox()

In [8]:
job = cluster.job()
temp = job.table('//home/cloud_analytics/events/ba_statuses/temp')
data = job.table('//home/cloud_analytics/events/ba_statuses/ba_status_changes')
job.concat(
    temp,
    data,
) \
.unique('dn205arvhgf66suqnvbd', 'event_time', 'ba_status') \
.put('//home/cloud_analytics/events/ba_statuses/ba_status_changes')
job.run()

VBox()

In [None]:
def calc_cohort_dataset(groups):
    for key, records in groups:
    result_dict = {
        'cloud_created': 0,
        'ba_created': 0,
        'day_use': 0,
        'first_trial_consumption': 0,
        'first_payment': 0,
        'first_paid_consumption': 0,
        'ba_became_paid': 0,
        'is_trial_consumption': 0,
        'is_paid_consumption': 0,
        'trial_consumption': 0,
        'paid_consumption': 0,
    }
    rec_list = list(records)
    is_cloud_created = 0
    rec_counter = 0
    for rec in records:

In [None]:
job = cluster.job()

In [2]:
croud_history = pd.read_csv('clouds (1).csv')

In [3]:
croud_history

Unnamed: 0,created_at,created_by,deleted_at,description,id,modified_at,name,status,permission_stages
0,1545193709,ajee18qj5es5ncsu7nrr,,,b1g00033e8gkp0e0c3mq,1545193710,cloud-b1g00033e8gkp0,CREATING,
1,1546864344,aje6vilvc8rco7tc78j1,,,b1g0008ac1a1hhnr5e4r,1546864345,cloud-b1g0008ac1a1hh,CREATING,
2,1544444444,ajegjjh1jm4bpf1oshi8,,,b1g000r89p81aec9or25,1544444445,cloud-b1g000r89p81ae,CREATING,
3,1547505792,aje8dk9jltsqiqhtdpa3,,,b1g0039vsiut6ttj1g9q,1547505793,cloud-b1g0039vsiut6t,CREATING,
4,1544202111,ajeu0bki69stqfrqs0hh,,,b1g0049mlfds86ducgth,1544202112,cloud-b1g0049mlfds86,CREATING,
5,1548016539,aje11k7n9srtasi9o2po,,,b1g004li7mf9ra2qrr9a,1548016540,cloud-b1g004li7mf9ra,CREATING,
6,1547504036,ajec77ljhabge2mbfjmv,,,b1g004p6kelapibjurjm,1547504037,cloud-b1g004p6kelapi,CREATING,
7,1544196675,ajeg9dhr2lg1m6g2pvs2,,,b1g0052o6nafa1k1r1t5,1544196676,cloud-b1g0052o6nafa1,CREATING,
8,1549969223,aje6sulou20771ns46fn,,,b1g0057bp8k0plj77jdv,1549969224,cloud-b1g0057bp8k0pl,CREATING,
9,1545936508,ajevukbisghiop840d5q,,,b1g005b2v1jse9ro5j24,1545936509,cloud-b1g005b2v1jse9,CREATING,


In [4]:
croud_history['status'].value_counts()

CREATING              56440
ACTIVE                 1906
BLOCKED                 120
BLOCKED_BY_BILLING       13
BLOCKED_MANUALLY         10
Name: status, dtype: int64

In [5]:
croud_history[croud_history['status']== 'BLOCKED']

Unnamed: 0,created_at,created_by,deleted_at,description,id,modified_at,name,status,permission_stages
448,1540281841,aje0k90q1inor0ijenpp,,,b1g08346v06lhfnolagd,1542874946,cloud-b1g08346v06lhf,BLOCKED,
854,1539328254,ajegg0hab8c8vbi1ekn2,,,b1g0f8mq0unf5fnuvcql,1542209488,cloud-b1g0f8mq0unf5f,BLOCKED,
1264,1539646140,ajerimd067rrdktj5k3f,,,b1g0m4dcjh5f5hkc39kl,1543029834,cloud-b1g0m4dcjh5f5h,BLOCKED,
1542,1541511038,ajetsbdmlthgir9dqr2q,,,b1g0qqaeukemnk8ev0p2,1542460204,cloud-b1g0qqaeukemnk,BLOCKED,
1771,1540830121,aje9gmh81040n77jao7s,,,b1g0uom0sihmla5no4cv,1542377309,cloud-b1g0uom0sihmla,BLOCKED,
1848,1539473923,ajepqi2e8njj20j62k9o,,,b1g0vtus2ercnhidrgks,1542755558,cloud-b1g0vtus2ercnh,BLOCKED,
1980,1540550653,ajemaf1d8b8c27ijrc56,,,b1g129o0csgi78s5njhn,1541612027,cloud-b1g129o0csgi78,BLOCKED,
2117,1539532024,aje4h5hht3cnprnbv0qm,,,b1g14ll2pcm41t5lujbj,1541121657,cloud-b1g14ll2pcm41t,BLOCKED,
2503,1539324773,aje5e91i600pi70f5j36,,,b1g1bek2uvmp6rs1jdpf,1542680235,cloud-b1g1bek2uvmp6r,BLOCKED,
3124,1541498000,aje7a1mbfgo0kp049280,,,b1g1lusv8hqvr9lq4h1l,1542963602,cloud-b1g1lusv8hqvr9,BLOCKED,


In [10]:
croud_history[croud_history['id']== 'b1g08346v06lhfnolagd']

Unnamed: 0,created_at,created_by,deleted_at,description,id,modified_at,name,status,permission_stages
448,1540281841,aje0k90q1inor0ijenpp,,,b1g08346v06lhfnolagd,1542874946,cloud-b1g08346v06lhf,BLOCKED,


In [9]:
croud_history['modified_at'].min()

1540907384

In [5]:
import datetime
def date_range_by_weeks(start_str, end_str):
    start = datetime.datetime.strptime(start_str, '%Y-%m-%d')
    end = datetime.datetime.strptime(end_str, '%Y-%m-%d')
    delta = int((end - start).days/7) + 1
    date_list = []
    
    for i in range(delta):
        date_list.append( str((start + datetime.timedelta(days = i*7)).date()) )
        
    return date_list

In [6]:
date_range_by_weeks('2019-01-28', str(datetime.datetime.now().date()))

['2019-01-28', '2019-02-04', '2019-02-11']

In [12]:
cluster.write()

TypeError: write() takes at least 3 arguments (2 given)