In [1]:
import datetime as dt
from steemdata import SteemData

import pandas as pd
import numpy as np

import plotly.plotly as py
import plotly.graph_objs as go
import cufflinks as cf
    
# helpers
from toolz import keyfilter

def keep(d, whitelist):
    return keyfilter(lambda k: k in whitelist, d)

def omit(d, blacklist):
    return keyfilter(lambda k: k not in blacklist, d)

In [2]:
db = SteemData().db

## Methods

In [3]:
import json
import requests as rs
from steem.post import Post

def get_csrf(url):
    resp = rs.get(url)

    if resp.status_code == 200:
        body = resp.text
        start_index = body.find('"csrf":"')
        csrf = body[start_index + 8:start_index + 44]
        return csrf, resp.cookies

def get_view_count(url_partial, csrf, cookies):
    payload = {
        "csrf": csrf,
        "page": url_partial,
        "ref": "",
    }

    url = 'https://steemit.com/api/v1/page_view'
    resp = rs.post(url, json=payload, cookies=cookies)

    if resp.status_code == 200 and resp.text:
        return json.loads(resp.text)
    return {}

def get_views(identifier):
    p = Post(identifier)
    csrf, cookies = get_csrf('https://steemit.com' + p.url)
    return get_view_count(p.url, csrf, cookies).get('views', -1)

## Top Viewed Posts

In [4]:
# time constraints
time_constraints = {
    '$gte': dt.datetime.now() - dt.timedelta(days=8),
}
conditions = {
    'created': time_constraints,
#     'net_rshares': {'$gt': 0},
#     'author_reputation': {'$gt': 0},
    'net_votes': {'$gt': 100},
    'children': {'$gt': 3},
}
projection = {
    '_id': 0,
    'identifier': 1,
    'net_votes': 1,
    'net_rshares': 1,
    'author_reputation': 1,
    'title': 1,
    'author': 1,
    'pending_payout_value': 1,
    'total_payout_value': 1,
}
posts = list(db['Posts'].find(conditions, projection=projection))

In [5]:
def filter_spam(post):
    if int(post['net_rshares']) < 0 or int(post['author_reputation']) < 0:
        return False
    return True

In [6]:
posts = list(filter(filter_spam, posts))

In [7]:
len(posts)

2476

In [8]:
# posts2 = []
# posts_gen = iter(posts)

In [None]:
# from contextlib import suppress
# import time

# for p in posts_gen:
#     with suppress(Exception):
#         views = get_views(p['identifier'])
#         time.sleep(0.01)
#         print(views)
        
#         posts2.append({
#             **p,
#             'views': views
#         })
        

In [None]:
from funcy import silent
posts2 = [{**x, 'views': silent(get_views)(x['identifier'])} for x in posts]

In [None]:
posts3 = [{
    **x,
    'pending_payout_value': x['pending_payout_value']['amount'],
    'total_payout_value': x['total_payout_value']['amount'],
} for x in posts2]

In [None]:
df = pd.DataFrame(posts3)

In [None]:
df2 = df[['author', 'title', 'net_votes', 'pending_payout_value', 'views']].dropna()
df2['author'] = df2['author'].apply(lambda x: "@%s" % x)
df2['views'] = df2['views'].apply(int)
df2['title'] = df2['title'].apply(lambda x: x[:30])

In [None]:
df3 = df2.sort_values('views', ascending=False).head(25)
df3.head(5)

In [None]:
from tabulate import tabulate
print(tabulate(df3.values.tolist(), df3.columns.tolist(), tablefmt="html"))