Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/top trending posts wo bots #14

Merged
merged 21 commits into from
Apr 2, 2018
Merged
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

* SQLite is now used to store blockchain data
* New Poster class to allow for waiting
* New posting of trending without bots
* BUG FIX: Images and links are now properly fitlered
* BUG FIX: Better displaying on busy!

*0.6.1a* - 2018-03-22

Expand Down
51 changes: 39 additions & 12 deletions integration_tests/bchain/postdata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
from trufflepig.bchain.poster import Poster


@pytest.mark.skipif(config.PASSWORD is None, reason="needs posting key")
def test_test_top10post(steem):

steem.wallet.unlock(config.PASSWORD)
if config.PASSWORD:
steem.wallet.unlock(config.PASSWORD)

poster = Poster(steem=steem,
account=config.ACCOUNT,
waiting_time=0.1)
waiting_time=0.1,
no_posting_key_mode=config.PASSWORD is None)

posts = random_data.create_n_random_posts(10)
df = pd.DataFrame(posts)
Expand All @@ -35,14 +36,15 @@ def test_test_top10post(steem):
tbpd.comment_on_own_top_list(df, poster, permalink)


@pytest.mark.skipif(config.PASSWORD is None, reason="needs posting key")
def test_test_all_top_with_real_data(steem):

steem.wallet.unlock(config.PASSWORD)
if config.PASSWORD:
steem.wallet.unlock(config.PASSWORD)

poster = Poster(steem=steem,
account=config.ACCOUNT,
waiting_time=0.1)
waiting_time=0.1,
no_posting_key_mode=config.PASSWORD is None)

df = tpbg.scrape_hour_data(steem, stop_after=10)

Expand All @@ -62,14 +64,15 @@ def test_test_all_top_with_real_data(steem):
overview_permalink='jjj')


@pytest.mark.skipif(config.PASSWORD is None, reason="needs posting key")
def test_test_top20_vote_and_comment(steem):

steem.wallet.unlock(config.PASSWORD)
if config.PASSWORD:
steem.wallet.unlock(config.PASSWORD)

poster = Poster(steem=steem,
account=config.ACCOUNT,
waiting_time=0.1)
waiting_time=0.1,
no_posting_key_mode=config.PASSWORD is None)

posts = random_data.create_n_random_posts(10)
df = pd.DataFrame(posts)
Expand All @@ -78,12 +81,36 @@ def test_test_top20_vote_and_comment(steem):

df = tppp.preprocess(df, ncores=1)

account = config.ACCOUNT

tbpd.vote_and_comment_on_topK(df, poster, 'laida',
overview_permalink='lll')


@pytest.mark.skipif(config.PASSWORD is None, reason="needs posting key")
def test_create_wallet(steem):
tbpd.create_wallet(steem, config.PASSWORD, config.POSTING_KEY)
tbpd.create_wallet(steem, config.PASSWORD, config.POSTING_KEY)


def test_test_top_trending_post(steem):

if config.PASSWORD:
steem.wallet.unlock(config.PASSWORD)

poster = Poster(steem=steem,
account=config.ACCOUNT,
waiting_time=0.1,
no_posting_key_mode=config.PASSWORD is None)

posts = random_data.create_n_random_posts(10)
df = pd.DataFrame(posts)
df['reward'] = df.reward
df['predicted_votes'] = df.votes

df = tppp.preprocess(df, ncores=1)

date = pd.datetime.utcnow().date()

tbpd.post_top_trending_list(df, poster, date,
overview_permalink='iii',
trufflepicks_permalink='kkk',
steem_amount=10,
sbd_amount=10)
4 changes: 2 additions & 2 deletions integration_tests/bchain/postweeklyupdate_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ def test_existence(steem):
assert isinstance(result, str)


@pytest.mark.skipif(config.PASSWORD is None, reason="needs posting key")
def test_weekly_post(steem):
posts = create_n_random_posts(300)

Expand All @@ -72,7 +71,8 @@ def test_weekly_post(steem):
post_frame['sbd_bought_reward'] = 0
post_frame['bought_votes'] = 0

poster = Poster(account=config.ACCOUNT, steem=steem)
poster = Poster(account=config.ACCOUNT, steem=steem,
no_posting_key_mode=config.PASSWORD is None)
permalink = tppw.post_weakly_update(pipeline, post_frame,
poster=poster,
current_datetime=current_date)
Expand Down
8 changes: 4 additions & 4 deletions integration_tests/pigonduty_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,28 @@
from trufflepig.bchain.poster import Poster


@pytest.mark.skipif(config.PASSWORD is None, reason="needs posting key")
def test_call_a_pig(steem):
current_datetime = '2018-03-03-18:21:30'

pipeline = MockPipeline()
poster = Poster(steem=steem,
account=config.ACCOUNT,
waiting_time=0.1)
waiting_time=0.1,
no_posting_key_mode=config.PASSWORD is None)

tppd.call_a_pig(poster=poster,
pipeline=pipeline, topN_permalink='www.test.com',
current_datetime=current_datetime, hours=0.1,
overview_permalink='dsfd')


@pytest.mark.skipif(config.PASSWORD is None, reason="needs posting key")
def test_call_a_pig_empty_frame(steem):
aacs = (('smcaterpillar','question-is-there-an-api-to-upload-images-to-steemit'),)

poster = Poster(steem=steem,
account=config.ACCOUNT,
waiting_time=0.51)
waiting_time=0.51,
no_posting_key_mode=config.PASSWORD is None)

pipeline = MockPipeline()
tppd.execute_call(comment_authors_and_permalinks=aacs,
Expand Down
26 changes: 26 additions & 0 deletions integration_tests/preprocessing_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,29 @@ def test_bid_bot_correction_real_data(steem):

assert upvotes
assert (df.sbd_bought_reward.mean() > 0) or (df.steem_bought_reward.mean() > 0)


def test_filtered_body_no_images_regression(steem):
""" Test for error in filtering as in these quotes: https://steemit.com/steemit/@trufflepig/daily-truffle-picks-2018-03-31"""
posts = tpgd.get_post_data([('colovhis', 'dofus-mastodon-cemetery-basic-tutorial'),
('joshuaetim','rewarding-hardwork-and-excellence-amongst-school-children-'
'through-steem-powered-notebooks-and-writing-materials-3rd-phase-of')],
steem=steem)

post_frame = pd.DataFrame(posts)
post_frame = tppp.preprocess(post_frame, ncores=1)

assert not any(post_frame.filtered_body.apply(lambda x: '.JPG' in x))


def test_filtered_body_classtextjustify_regression(steem):
""" Test for error in fitlering as in these quotes: https://steemit.com/steemit/@trufflepig/daily-truffle-picks-2018-03-31"""
posts = tpgd.get_post_data([('colovhis', 'dofus-mastodon-cemetery-basic-tutorial'),
('joshuaetim','rewarding-hardwork-and-excellence-amongst-school-children-'
'through-steem-powered-notebooks-and-writing-materials-3rd-phase-of')],
steem=steem)

post_frame = pd.DataFrame(posts)
post_frame = tppp.preprocess(post_frame, ncores=1)

assert not any(post_frame.filtered_body.apply(lambda x: 'classtextjustify' in x))
40 changes: 40 additions & 0 deletions integration_tests/trending0bot_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import pytest

import pandas as pd
import trufflepig.bchain.getdata as tpgd
import trufflepig.preprocessing as tppp
import trufflepig.bchain.getaccountdata as tpad
from trufflepig.bchain.poster import Poster
import trufflepig.trending0bidbots as tt0b
from trufflepig.testutils.pytest_fixtures import steem
from trufflepig import config


def test_create_trending_post(steem):

current_datetime = pd.datetime.utcnow()

data_frame = tpgd.scrape_hour_data(steem=steem,
current_datetime=current_datetime,
ncores=32,
offset_hours=8,
hours=1, stop_after=20)


min_datetime = data_frame.created.min()
max_datetime = data_frame.created.max() + pd.Timedelta(days=8)
upvote_payments = tpad.get_upvote_payments_to_bots(steem=steem,
min_datetime=min_datetime,
max_datetime=max_datetime,
bots=['booster'])

data_frame = df = tppp.preprocess(data_frame, ncores=1)

data_frame = tppp.compute_bidbot_correction(post_frame=data_frame,
upvote_payments=upvote_payments)
account = config.ACCOUNT
poster = Poster(account=account, steem=steem,
no_posting_key_mode=config.PASSWORD is None)

tt0b.create_trending_post(data_frame, upvote_payments, poster, 'test',
'test', current_datetime)
22 changes: 22 additions & 0 deletions tests/bchain/posts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,26 @@ def test_weekly_update():
topics=topics)

assert body
assert title


def test_top_trending_post():
posts = random_data.create_n_random_posts(10)
df = pd.DataFrame(posts)
df = tppp.preprocess(df, ncores=1)

date = pd.datetime.utcnow().date()
df.image_urls = df.body.apply(lambda x: tptf.get_image_urls(x))

title, post = tbpo.top_trending_post(topN_authors=df.author,
topN_permalinks=df.permalink,
topN_titles=df.title,
topN_filtered_bodies=df.filtered_body,
topN_image_urls=df.image_urls,
topN_rewards=df.reward, sbd_amount=10,
steem_amount=10,
title_date=date, trufflepicks_link='de.de.de',
truffle_link='www.de')

assert post
assert title
2 changes: 1 addition & 1 deletion tests/filters/textfilters_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def test_filter_special_characters():

def test_filter_formatting():
result = tptf.filter_formatting('Hi&nbsphey aligncenter nbsp Styletextalign kk')
assert result == 'Hi hey kk'
assert result == 'Hihey kk'


def test_replace_newlines():
Expand Down
7 changes: 6 additions & 1 deletion tests/utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from steembase.exceptions import RPCError

from trufflepig.utils import progressbar, error_retry, none_retry
from trufflepig.utils import progressbar, error_retry, none_retry, none_error_retry


def test_progressbar():
Expand Down Expand Up @@ -34,3 +34,8 @@ def test_no_logrpc_retry():
def test_none_retry():
result = none_retry(g, sleep_time=0.01)()
assert result is None


def test_none_error_retry():
result = none_error_retry(g, sleep_time=0.1)()
assert result is None
4 changes: 2 additions & 2 deletions trufflepig/bchain/checkops.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from steem import Steem
from steem.post import Post

from trufflepig.utils import progressbar, none_retry, error_retry
from trufflepig.utils import progressbar, error_retry, none_error_retry
import trufflepig.bchain.getdata as tpbg


Expand Down Expand Up @@ -44,7 +44,7 @@ def check_all_ops_in_block(block_num, steem, account):
List of tuples with comment authors and permalinks

"""
operations = none_retry(steem.get_ops_in_block)(block_num, False)
operations = none_error_retry(steem.get_ops_in_block)(block_num, False)
if operations:
return extract_comment_authors_and_permalinks(operations, account)
else:
Expand Down
8 changes: 6 additions & 2 deletions trufflepig/bchain/getaccountdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def find_nearest_index(target_datetime,

best_smallest_index = 1
increase = index_tolerance + 1
current_datetime = None
for _ in range(max_tries):
try:
action = next(acc.get_account_history(current_index, limit=1))
Expand All @@ -114,10 +115,13 @@ def find_nearest_index(target_datetime,
latest_index))
except Exception:
logger.exception('Problems for index {}. Reconeccting...'.format(current_index))
current_index += 1
best_largest_index += 1
current_index -= 1
best_largest_index -= 1
steem.reconnect()
acc = Account(account, steem)
if current_index <= 1:
logger.error('Could not find index returning 1')
return 1, current_datetime


def get_delegates_and_shares(account, steem):
Expand Down
17 changes: 10 additions & 7 deletions trufflepig/bchain/getdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import json
from json import JSONDecodeError

from trufflepig.utils import progressbar, none_retry, error_retry
from trufflepig.utils import progressbar, error_retry, none_error_retry
import trufflepig.persist as tppe


Expand Down Expand Up @@ -52,7 +52,7 @@ def get_block_headers_between_offset_start(start_datetime, end_datetime,
logger.info('Collecting header infos')
while True:
try:
header = none_retry(steem.get_block_header)(current_block_num)
header = none_error_retry(steem.get_block_header)(current_block_num)
current_datetime = pd.to_datetime(header['timestamp'])
if start_datetime <= current_datetime and current_datetime <= end_datetime:
header['timestamp'] = current_datetime
Expand Down Expand Up @@ -93,22 +93,23 @@ def find_nearest_block_num(target_datetime, steem,

"""
if latest_block_num is None:
latest_block_num = none_retry(Blockchain(steem).get_current_block_num)()
latest_block_num = none_error_retry(Blockchain(steem).get_current_block_num)()

current_block_num = latest_block_num
best_largest_block_num = latest_block_num

header = none_retry(steem.get_block_header)(best_largest_block_num)
header = none_error_retry(steem.get_block_header)(best_largest_block_num)
best_largest_datetime = pd.to_datetime(header['timestamp'])
if target_datetime > best_largest_datetime:
logger.warning('Target beyond largest block num')
return latest_block_num, best_largest_datetime

best_smallest_block_num = 1
increase = block_num_tolerance + 1
current_datetime = None
for _ in range(max_tries):
try:
header = none_retry(steem.get_block_header)(current_block_num)
header = none_error_retry(steem.get_block_header)(current_block_num)
current_datetime = pd.to_datetime(header['timestamp'])
if increase <= block_num_tolerance:
return current_block_num, current_datetime
Expand All @@ -130,6 +131,9 @@ def find_nearest_block_num(target_datetime, steem,
current_block_num -= 1
best_smallest_block_num -= 1
steem.reconnect()
if current_block_num <= 1:
logger.error('Could not find block num returning 1')
return 1, current_datetime


def get_block_headers_between(start_datetime, end_datetime, steem):
Expand Down Expand Up @@ -219,7 +223,6 @@ def get_post_data(authors_and_permalinks, steem):

# Add positive votes and subtract negative
votes = sum(1 if x['percent'] > 0 else -1 for x in p.active_votes)
votes = max(votes, 0)

post = {
'title': p.title,
Expand Down Expand Up @@ -254,7 +257,7 @@ def get_all_posts_from_block(block_num, steem,

"""
try:
operations = none_retry(steem.get_ops_in_block)(block_num, False)
operations = none_error_retry(steem.get_ops_in_block)(block_num, False)
if operations:
authors_and_permalinks = extract_authors_and_permalinks(operations)
if exclude_authors_and_permalinks:
Expand Down
Loading