Skip to content

Commit

Permalink
Merge f508137 into 95fe7fe
Browse files Browse the repository at this point in the history
  • Loading branch information
SmokinCaterpillar committed Mar 12, 2018
2 parents 95fe7fe + f508137 commit 238e37f
Show file tree
Hide file tree
Showing 17 changed files with 416 additions and 27 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
*0.6.0a* - 2018-03-10

* Two new readability scores
* New correction for many bibots and vote selling services
* Bid bot stats are listed in the weekly post

*0.5.0a* - 2018-03-07

Expand Down
27 changes: 27 additions & 0 deletions integration_tests/bchain/getaccountdata_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,30 @@ def test_payouts(steem):

assert 'smcaterpillar' in result
assert 'trufflepig' not in result


def test_bidbot_test(steem):
min_datetime = pd.datetime.utcnow() - pd.Timedelta(days=14)
max_datetime = min_datetime + pd.Timedelta(days=13)
result = tpac.get_upvote_payments('brittuf', steem, min_datetime,
max_datetime)
assert result


def test_bidbot_test_max_time(steem):
min_datetime = pd.datetime.utcnow() - pd.Timedelta(days=14)
max_datetime = min_datetime + pd.Timedelta(days=13)
result = tpac.get_upvote_payments('brittuf', steem, min_datetime,
max_datetime, max_time=0.1)
assert len(result) <= 1


def test_get_upvote_payments_for_accounts(steem_kwargs):
min_datetime = pd.datetime.utcnow() - pd.Timedelta(days=14)
max_datetime = min_datetime + pd.Timedelta(days=5)
accounts = ['trufflepig', 'smcaterpillar', 'brittuf']
result = tpac.get_upvote_payments_for_accounts(accounts,
steem_kwargs,
min_datetime=min_datetime,
max_datetime=max_datetime)
assert result
4 changes: 3 additions & 1 deletion integration_tests/bchain/paydelegates_test.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest
import pandas as pd

from trufflepig.testutils.pytest_fixtures import steem_kwargs
import trufflepig.bchain.paydelegates as tppd
Expand All @@ -14,4 +15,5 @@ def test_pay_delegates(steem_kwargs):

tppd.pay_delegates(account=config.ACCOUNT,
steem_args=steem_kwargs,
current_datetime='2029-01-01')
current_datetime=pd.datetime.utcnow()#'2029-01-01'
)
9 changes: 9 additions & 0 deletions integration_tests/bchain/postweeklyupdate_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ def test_statistics():
post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50)
pipeline = tpmo.train_pipeline(post_frame, topic_kwargs=topic_kwargs,
regressor_kwargs=regressor_kwargs)

post_frame['steem_bought_reward'] = 0
post_frame['sbd_bought_reward'] = 0
post_frame['bought_votes'] = 0

stats = tppw.compute_weekly_statistics(post_frame, pipeline)
steem_per_mvests = 490

Expand Down Expand Up @@ -62,6 +67,10 @@ def test_weekly_post(steem_kwargs):
pipeline = tpmo.train_pipeline(post_frame, topic_kwargs=topic_kwargs,
regressor_kwargs=regressor_kwargs)

post_frame['steem_bought_reward'] = 0
post_frame['sbd_bought_reward'] = 0
post_frame['bought_votes'] = 0

permalink = tppw.post_weakly_update(pipeline, post_frame,
account=config.ACCOUNT,
steem_args=steem_kwargs,
Expand Down
30 changes: 28 additions & 2 deletions integration_tests/preprocessing_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,9 @@
from pandas.testing import assert_frame_equal

import trufflepig.preprocessing as tppp
import trufflepig.bchain.getdata as tpgd
from trufflepig.testutils.random_data import create_n_random_posts
from trufflepig.testutils.pytest_fixtures import temp_dir
from trufflepig.testutils.pytest_fixtures import temp_dir, steem_kwargs


def test_load_or_preproc(temp_dir):
Expand All @@ -22,4 +23,29 @@ def test_load_or_preproc(temp_dir):
ncores=5, chunksize=20)

assert len(os.listdir(temp_dir)) == 1
assert_frame_equal(frame, frame2)
assert_frame_equal(frame, frame2)


def test_load_or_preproc_with_real_data(steem_kwargs, temp_dir):
filename = os.path.join(temp_dir, 'pptest.gz')

start_datetime = pd.datetime.utcnow() - pd.Timedelta(days=14)
end_datetime = start_datetime + pd.Timedelta(hours=2)
posts = tpgd.get_all_posts_between_parallel(start_datetime,
end_datetime,
steem_kwargs,
stop_after=15)
post_frame = pd.DataFrame(posts)
bots = ['okankarol', 'bidseption', 'highvote', 'oguzhangazi', 'ottoman',]
frame = tppp.load_or_preprocess(post_frame, filename,
steem_args_for_upvote=steem_kwargs,
ncores=5, chunksize=20, bots=bots)

assert len(os.listdir(temp_dir)) == 1

frame2 = tppp.load_or_preprocess(post_frame, filename,
steem_args_for_upvote=steem_kwargs,
ncores=5, chunksize=20, bots=bots)

assert len(os.listdir(temp_dir)) == 1
assert_frame_equal(frame, frame2)
7 changes: 5 additions & 2 deletions scripts/do_cross_val.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def main():

post_frame = tpgd.load_or_scrape_training_data(steem, directory,
current_datetime=current_datetime,
days=12,
days=10,
offset_days=0)

gc.collect()
Expand All @@ -39,6 +39,7 @@ def main():
ngrams=(1, 2))

post_frame = tppp.load_or_preprocess(post_frame, crossval_filename,
steem_args_for_upvote=steem,
ncores=8, chunksize=500,
min_en_prob=0.9)

Expand All @@ -57,7 +58,9 @@ def main():
# n_jobs=4, targets=['reward'])

pipe, test_frame = tpmo.train_test_pipeline(post_frame, topic_kwargs=topic_kwargs,
regressor_kwargs=regressor_kwargs, targets=['reward', 'votes'],
regressor_kwargs=regressor_kwargs,
targets=['adjusted_reward',
'adjusted_votes'],
random_state=42)

tpmo.log_pipeline_info(pipe)
Expand Down
7 changes: 7 additions & 0 deletions tests/bchain/posts_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ def test_weekly_update():
total_votes = 99897788
total_reward = 79898973

bid_bots_sbd = 4242
bid_bots_steem = 12
bid_bots_percent = 99.9998

median_reward = 0.012
mean_reward = 6.2987347329
dollar_percent = 69.80921393
Expand Down Expand Up @@ -126,6 +130,9 @@ def test_weekly_update():
total_posts=total_posts,
total_votes=total_votes,
total_reward=total_reward,
bid_bots_sbd=bid_bots_sbd,
bid_bots_steem=bid_bots_steem,
bid_bots_percent=bid_bots_percent,
median_reward=median_reward,
mean_reward=mean_reward,
dollar_percent=dollar_percent,
Expand Down
19 changes: 19 additions & 0 deletions tests/preprocessing_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,22 @@ def test_preprocessing_random_parallel():
min_en_prob=0.8, max_errors_per_word=0.5)

assert len(filtered) > 30


def test_bid_bot_correction():
posts = create_n_random_posts(30)
post_frame = pd.DataFrame(posts)

bought = {}
bought[('hello', 'kitty')] = ['19 STEEM']
sample_frame = post_frame[['author', 'permalink']].sample(10)
for _, (author, permalink) in sample_frame.iterrows():
bought[(author, permalink)] = {'aaa':'3 STEEM', 'bbb': '4 SBD'}

post_frame = tppp.compute_bidbot_correction(post_frame,
bought)

assert post_frame.adjusted_reward.mean() < post_frame.reward.mean()
assert all(post_frame.adjusted_reward >= 0)
assert post_frame.adjusted_votes.mean() < post_frame.votes.mean()
assert all(post_frame.adjusted_votes >= 0)
2 changes: 1 addition & 1 deletion trufflepig/bchain/checkops.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,4 @@ def get_parent_posts(comment_authors_and_permalinks, steem):
logger.exception(('Could not work with comment {} by '
'{}').format(comment_permalink, comment_author))

return posts
return posts

0 comments on commit 238e37f

Please sign in to comment.