In [3]:
import datetime as dt
from steemdata import SteemData

import pandas as pd
import numpy as np

try:
    import plotly.plotly as py
    import plotly.graph_objs as go
    import cufflinks as cf
except:
    !pip install plotly
    !pip install cufflinks
    
# helpers
from toolz import keyfilter

def keep(d, whitelist):
    return keyfilter(lambda k: k in whitelist, d)

def omit(d, blacklist):
    return keyfilter(lambda k: k not in blacklist, d)

In [4]:
db = SteemData().db

In [56]:
# app specific imports
from funcy.seqs import first
from toolz.functoolz import pipe
from steem.account import Account
from steem.utils import parse_time

## Eligible Accounts

Rules:
1. Need to have at least 0.15MV
2. Need to have been active in the past 6 months

In [58]:
conditions = {
    'balances.available.VESTS': {'$gt': 150000},
}
projection = {
    '_id': 0,
    'name': 1,
    'balances.available.VESTS': 1,
}
accounts = list(db['Accounts'].find(conditions, projection=projection))

In [59]:
len(accounts)

10868

In [60]:
def last_op_time(username):
    history = Account(username).history_reverse(batch_size=10)
    last_item = first(history)
    if last_item:
        return parse_time(last_item['timestamp'])
    
def filter_inactive(accounts):
    limit = dt.datetime.now() - dt.timedelta(days=180)
    return list(x for x in accounts if x['timestamp'] > limit)

def filter_invalid(accounts):
    return list(x for x in accounts if x['timestamp'])

In [61]:
accounts = [{
    'name': account['name'],
    'timestamp': last_op_time(account['name']),
    'vests': account['balances']['available']['VESTS'],
} for account in accounts]

In [62]:
valid_accounts = pipe(accounts, filter_invalid, filter_inactive)

In [92]:
df = pd.DataFrame(valid_accounts)
df.drop('timestamp', axis=1, inplace=True)

# ignore steemit account
df.drop(df[df.name == 'steemit'].index, inplace=True)

# count the vests, calc % shares
all_vests = df['vests'].sum()
df['pct_share'] = df['vests'] / all_vests * 100

In [94]:
df_sorted = df.sort_values('vests', ascending=False)
df_sorted.head()

Unnamed: 0,name,vests,pct_share
2720,freedom,14865430000.0,7.394485
5442,ned,11934230000.0,5.936424
1837,dan,8798098000.0,4.376423
2525,steem,8013334000.0,3.98606
8250,val-a,6219531000.0,3.093771


In [104]:
df_sorted[['name', 'vests', 'pct_share']].to_json('raw_dist.json', orient='records')

In [106]:
!cat raw_dist.json | python -m 'json.tool' > distribution.json