Merge f508137 into 95fe7fe

SmokinCaterpillar · Mar 12, 2018 · 238e37f · 238e37f
2 parents 95fe7fe + f508137
commit 238e37f
Show file tree

Hide file tree

Showing 17 changed files with 416 additions and 27 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,8 @@
 *0.6.0a* - 2018-03-10
 
 * Two new readability scores
+* New correction for many bibots and vote selling services
+* Bid bot stats are listed in the weekly post
 
 *0.5.0a* - 2018-03-07
 

diff --git a/integration_tests/bchain/getaccountdata_test.py b/integration_tests/bchain/getaccountdata_test.py
@@ -27,3 +27,30 @@ def test_payouts(steem):
 
     assert 'smcaterpillar' in result
     assert 'trufflepig' not in result
+
+
+def test_bidbot_test(steem):
+    min_datetime = pd.datetime.utcnow() - pd.Timedelta(days=14)
+    max_datetime = min_datetime + pd.Timedelta(days=13)
+    result = tpac.get_upvote_payments('brittuf', steem, min_datetime,
+                                      max_datetime)
+    assert result
+
+
+def test_bidbot_test_max_time(steem):
+    min_datetime = pd.datetime.utcnow() - pd.Timedelta(days=14)
+    max_datetime = min_datetime + pd.Timedelta(days=13)
+    result = tpac.get_upvote_payments('brittuf', steem, min_datetime,
+                                      max_datetime, max_time=0.1)
+    assert len(result) <= 1
+
+
+def test_get_upvote_payments_for_accounts(steem_kwargs):
+    min_datetime = pd.datetime.utcnow() - pd.Timedelta(days=14)
+    max_datetime = min_datetime + pd.Timedelta(days=5)
+    accounts = ['trufflepig', 'smcaterpillar', 'brittuf']
+    result = tpac.get_upvote_payments_for_accounts(accounts,
+                                                   steem_kwargs,
+                                                   min_datetime=min_datetime,
+                                                   max_datetime=max_datetime)
+    assert result
diff --git a/integration_tests/bchain/paydelegates_test.py b/integration_tests/bchain/paydelegates_test.py
@@ -1,4 +1,5 @@
 import pytest
+import pandas as pd
 
 from trufflepig.testutils.pytest_fixtures import steem_kwargs
 import trufflepig.bchain.paydelegates as tppd
@@ -14,4 +15,5 @@ def test_pay_delegates(steem_kwargs):
 
     tppd.pay_delegates(account=config.ACCOUNT,
                        steem_args=steem_kwargs,
-                       current_datetime='2029-01-01')
+                       current_datetime=pd.datetime.utcnow()#'2029-01-01'
+                       )
diff --git a/integration_tests/bchain/postweeklyupdate_test.py b/integration_tests/bchain/postweeklyupdate_test.py
@@ -25,6 +25,11 @@ def test_statistics():
     post_frame = tppp.preprocess(post_frame, ncores=4, chunksize=50)
     pipeline = tpmo.train_pipeline(post_frame, topic_kwargs=topic_kwargs,
                                     regressor_kwargs=regressor_kwargs)
+
+    post_frame['steem_bought_reward'] = 0
+    post_frame['sbd_bought_reward'] = 0
+    post_frame['bought_votes'] = 0
+
     stats = tppw.compute_weekly_statistics(post_frame, pipeline)
     steem_per_mvests = 490
 
@@ -62,6 +67,10 @@ def test_weekly_post(steem_kwargs):
     pipeline = tpmo.train_pipeline(post_frame, topic_kwargs=topic_kwargs,
                                     regressor_kwargs=regressor_kwargs)
 
+    post_frame['steem_bought_reward'] = 0
+    post_frame['sbd_bought_reward'] = 0
+    post_frame['bought_votes'] = 0
+
     permalink = tppw.post_weakly_update(pipeline, post_frame,
                                         account=config.ACCOUNT,
                                         steem_args=steem_kwargs,

diff --git a/integration_tests/preprocessing_test.py b/integration_tests/preprocessing_test.py
@@ -4,8 +4,9 @@
 from pandas.testing import assert_frame_equal
 
 import trufflepig.preprocessing as tppp
+import trufflepig.bchain.getdata as tpgd
 from trufflepig.testutils.random_data import create_n_random_posts
-from trufflepig.testutils.pytest_fixtures import temp_dir
+from trufflepig.testutils.pytest_fixtures import temp_dir, steem_kwargs
 
 
 def test_load_or_preproc(temp_dir):
@@ -22,4 +23,29 @@ def test_load_or_preproc(temp_dir):
                                     ncores=5, chunksize=20)
 
     assert len(os.listdir(temp_dir)) == 1
-    assert_frame_equal(frame, frame2)
+    assert_frame_equal(frame, frame2)
+
+
+def test_load_or_preproc_with_real_data(steem_kwargs, temp_dir):
+    filename = os.path.join(temp_dir, 'pptest.gz')
+
+    start_datetime = pd.datetime.utcnow() - pd.Timedelta(days=14)
+    end_datetime = start_datetime + pd.Timedelta(hours=2)
+    posts = tpgd.get_all_posts_between_parallel(start_datetime,
+                                                     end_datetime,
+                                                     steem_kwargs,
+                                                     stop_after=15)
+    post_frame = pd.DataFrame(posts)
+    bots = ['okankarol', 'bidseption', 'highvote', 'oguzhangazi', 'ottoman',]
+    frame = tppp.load_or_preprocess(post_frame, filename,
+                                    steem_args_for_upvote=steem_kwargs,
+                                    ncores=5, chunksize=20, bots=bots)
+
+    assert len(os.listdir(temp_dir)) == 1
+
+    frame2 = tppp.load_or_preprocess(post_frame, filename,
+                                    steem_args_for_upvote=steem_kwargs,
+                                    ncores=5, chunksize=20, bots=bots)
+
+    assert len(os.listdir(temp_dir)) == 1
+    assert_frame_equal(frame, frame2)
diff --git a/scripts/do_cross_val.py b/scripts/do_cross_val.py
@@ -26,7 +26,7 @@ def main():
 
     post_frame = tpgd.load_or_scrape_training_data(steem, directory,
                                                    current_datetime=current_datetime,
-                                                   days=12,
+                                                   days=10,
                                                    offset_days=0)
 
     gc.collect()
@@ -39,6 +39,7 @@ def main():
                         ngrams=(1, 2))
 
     post_frame = tppp.load_or_preprocess(post_frame, crossval_filename,
+                                         steem_args_for_upvote=steem,
                                          ncores=8, chunksize=500,
                                          min_en_prob=0.9)
 
@@ -57,7 +58,9 @@ def main():
     #                     n_jobs=4, targets=['reward'])
 
     pipe, test_frame = tpmo.train_test_pipeline(post_frame,  topic_kwargs=topic_kwargs,
-                         regressor_kwargs=regressor_kwargs, targets=['reward', 'votes'],
+                         regressor_kwargs=regressor_kwargs,
+                                                targets=['adjusted_reward',
+                                                         'adjusted_votes'],
                                                 random_state=42)
 
     tpmo.log_pipeline_info(pipe)

diff --git a/tests/bchain/posts_test.py b/tests/bchain/posts_test.py
@@ -71,6 +71,10 @@ def test_weekly_update():
     total_votes = 99897788
     total_reward = 79898973
 
+    bid_bots_sbd = 4242
+    bid_bots_steem = 12
+    bid_bots_percent = 99.9998
+
     median_reward = 0.012
     mean_reward = 6.2987347329
     dollar_percent = 69.80921393
@@ -126,6 +130,9 @@ def test_weekly_update():
                   total_posts=total_posts,
                   total_votes=total_votes,
                   total_reward=total_reward,
+                  bid_bots_sbd=bid_bots_sbd,
+                  bid_bots_steem=bid_bots_steem,
+                  bid_bots_percent=bid_bots_percent,
                   median_reward=median_reward,
                   mean_reward=mean_reward,
                   dollar_percent=dollar_percent,

diff --git a/tests/preprocessing_test.py b/tests/preprocessing_test.py
@@ -29,3 +29,22 @@ def test_preprocessing_random_parallel():
                                min_en_prob=0.8, max_errors_per_word=0.5)
 
     assert len(filtered) > 30
+
+
+def test_bid_bot_correction():
+    posts = create_n_random_posts(30)
+    post_frame = pd.DataFrame(posts)
+
+    bought = {}
+    bought[('hello', 'kitty')] = ['19 STEEM']
+    sample_frame = post_frame[['author', 'permalink']].sample(10)
+    for _, (author, permalink) in sample_frame.iterrows():
+        bought[(author, permalink)] = {'aaa':'3 STEEM', 'bbb': '4 SBD'}
+
+    post_frame = tppp.compute_bidbot_correction(post_frame,
+                                                bought)
+
+    assert post_frame.adjusted_reward.mean() < post_frame.reward.mean()
+    assert all(post_frame.adjusted_reward >= 0)
+    assert post_frame.adjusted_votes.mean() < post_frame.votes.mean()
+    assert all(post_frame.adjusted_votes >= 0)
diff --git a/trufflepig/bchain/checkops.py b/trufflepig/bchain/checkops.py
@@ -205,4 +205,4 @@ def get_parent_posts(comment_authors_and_permalinks, steem):
             logger.exception(('Could not work with comment {} by '
                               '{}').format(comment_permalink, comment_author))
 
-    return posts
+    return posts