In [1]:
import pandas as pd, datetime, ast
from nile.api.v1 import (
    clusters,
    aggregators as na,
    extractors as ne,
    filters as nf,
    Record
)
from vault_client import instances

def get_datetime_from_epoch(epoch):
    try:
        return str(datetime.datetime.fromtimestamp(int(epoch)))
    except:
        return None


def get_last_not_empty_table(folder_path):
    tables_list = sorted([folder_path + '/' + x for x in job.driver.list(folder_path)], reverse=True)
    last_table_rows = 0
    last_table = ''
    for table in tables_list:
        try:
            table_ = job.driver.read(table)
        except:
            continue
        
        if table_.row_count > last_table_rows:
            last_table_rows =  table_.row_count
            last_table = table
    if last_table:
        return last_table
    else:
        return tables_list[0]


def get_table_list(folder_path):
    tables_list = sorted([folder_path + '/' + x for x in job.driver.list(folder_path)], reverse=True)
    return '{%s}' % (','.join(tables_list))




def get_event_details(rest_, field_):
    try:
        return ast.literal_eval(rest_['metadata'])[field_]
    except:
        return ''

def get_status_changes(groups):
    for key, records in groups:
        status = ''
        for rec in records:
            if rec['state'] != status:
                result_dict = {
                    'event': 'ba_status_changed',
                    'event_time': get_datetime_from_epoch(rec['updated_at']),
                    'ba_status': rec['state'],
                    'reason': str(get_event_details(rec['_rest'], 'block_reason')) + str(get_event_details(rec['_rest'], 'suspend_reason')),
                    'event_details': {
                        'reason': str(get_event_details(rec['_rest'], 'block_reason')) + str(get_event_details(rec['_rest'], 'suspend_reason')),
                        'old_status': status,
                        'new_status': rec['state']
                    }
                }
                status = rec['state']
                yield Record(key, **result_dict)

In [2]:
client = instances.Production()
yt_creds = client.get_version('ver-01d33pgv8pzc7t99s3egm24x47')
cluster = clusters.yt.Hahn(
    token = yt_creds['value']['token'],
    pool = yt_creds['value']['pool']
)

In [3]:
job = cluster.job()
path = get_last_not_empty_table('//home/logfeller/logs/yc-billing-export-monetary-grants/1h')

In [4]:
job = cluster.job()

clouds = job.table(path) \
    .unique(
        'id'
    ) \
    .project(
        'billing_account_id',
        grant_id = 'id',
        amount = ne.custom(lambda x: int(float(x)), 'initial_amount'),
        event = ne.const('grant_given'),
        event_time = ne.custom(get_datetime_from_epoch, 'created_at'),
        grant_duration = ne.custom(lambda x,y: (y-x)/86400, 'start_time', 'end_time'),
        grant_source = 'source'
    ) \
    .put('//home/cloud_analytics/events/grants/temp')
job.run()

VBox()