In [1]:
import pandas as pd

In [2]:
btc_news = pd.read_parquet('../2_data_processing/text_data/btc_news_processed.parquet.gzip')
eth_news = pd.read_parquet('../2_data_processing/text_data/eth_news_processed.parquet.gzip')
btc_tweets = pd.read_parquet('../2_data_processing/text_data/btc_tweets_processed.parquet.gzip').set_index('tweet_id')
eth_tweets = pd.read_parquet('../2_data_processing/text_data/eth_tweets_processed.parquet.gzip').set_index('tweet_id')
reddit_r_bitcoin = pd.read_parquet('../2_data_processing/text_data/reddit_r_bitcoin_processed.parquet.gzip')
reddit_r_ethereum = pd.read_parquet('../2_data_processing/text_data/reddit_r_ethereum_processed.parquet.gzip')

In [12]:
print(f' Number of news:\t\t   {len(btc_news) + len(eth_news):,}')
print(f' Number of reddit posts:\t  {len(reddit_r_bitcoin) + len(reddit_r_ethereum):,}')
print(f' Number of tweets:\t\t{len(btc_tweets) + len(eth_tweets):,}')

 Number of news:		   53,469
 Number of reddit posts:	  337,886
 Number of tweets:		1,837,953


In [3]:
btc_news_roberta = pd.read_parquet('../3_nlp_models/1_twitter_roberta_pretrained/btc_news_roberta_pretrained.parquet.gzip')
btc_tweets_roberta = pd.read_parquet('../3_nlp_models/1_twitter_roberta_pretrained/btc_tweets_roberta_pretrained.parquet.gzip').set_index('tweet_id')
btc_reddit_roberta = pd.read_parquet('../3_nlp_models/1_twitter_roberta_pretrained/reddit_r_bitcoin_roberta_pretrained.parquet.gzip')
btc_news_bart = pd.read_parquet('../3_nlp_models/2_bart_zero_shot/btc_news_bart_mnli.parquet.gzip')
btc_tweets_bart = pd.read_parquet('../3_nlp_models/2_bart_zero_shot/btc_tweets_bart_mnli.parquet.gzip').set_index('tweet_id')
btc_reddit_bart = pd.read_parquet('../3_nlp_models/2_bart_zero_shot/reddit_r_bitcoin_bart_mnli.parquet.gzip')
btc_news_roberta_finetuned = pd.read_parquet('../3_nlp_models/3_roberta_finetuned/second_results/btc_news_roberta_finetuned.parquet.gzip')
btc_tweets_roberta_finetuned = pd.read_parquet('../3_nlp_models/3_roberta_finetuned/second_results/btc_tweets_roberta_finetuned.parquet.gzip')
btc_reddit_roberta_finetuned = pd.read_parquet('../3_nlp_models/3_roberta_finetuned/second_results/reddit_r_bitcoin_roberta_finetuned.parquet.gzip')
eth_news_roberta = pd.read_parquet('../3_nlp_models/1_twitter_roberta_pretrained/eth_news_roberta_pretrained.parquet.gzip')
eth_tweets_roberta = pd.read_parquet('../3_nlp_models/1_twitter_roberta_pretrained/eth_tweets_roberta_pretrained.parquet.gzip').set_index('tweet_id')
eth_reddit_roberta = pd.read_parquet('../3_nlp_models/1_twitter_roberta_pretrained/reddit_r_ethereum_roberta_pretrained.parquet.gzip')
eth_news_bart = pd.read_parquet('../3_nlp_models/2_bart_zero_shot/eth_news_bart_mnli.parquet.gzip')
eth_tweets_bart = pd.read_parquet('../3_nlp_models/2_bart_zero_shot/eth_tweets_bart_mnli.parquet.gzip').set_index('tweet_id')
eth_reddit_bart = pd.read_parquet('../3_nlp_models/2_bart_zero_shot/reddit_r_ethereum_bart_mnli.parquet.gzip')
eth_news_roberta_finetuned = pd.read_parquet('../3_nlp_models/3_roberta_finetuned/second_results/eth_news_roberta_finetuned.parquet.gzip')
eth_tweets_roberta_finetuned = pd.read_parquet('../3_nlp_models/3_roberta_finetuned/second_results/eth_tweets_roberta_finetuned.parquet.gzip')
eth_reddit_roberta_finetuned = pd.read_parquet('../3_nlp_models/3_roberta_finetuned/second_results/reddit_r_ethereum_roberta_finetuned.parquet.gzip')

In [4]:
btc_news_merged = pd.concat([btc_news,
                             btc_news_bart.bart_mnli_bullish_score,
                             btc_news_roberta.twitter_roberta_pretrained_score], axis=1)
btc_tweets_merged = pd.concat([btc_tweets,
                               btc_tweets_bart.bart_mnli_bullish_score,
                               btc_tweets_roberta.twitter_roberta_pretrained_score], axis=1)
btc_reddit_merged = pd.concat([reddit_r_bitcoin,
                               btc_reddit_bart.bart_mnli_bullish_score,
                               btc_reddit_roberta.twitter_roberta_pretrained_score], axis=1)

eth_news_merged = pd.concat([eth_news,
                             eth_news_bart.bart_mnli_bullish_score,
                             eth_news_roberta.twitter_roberta_pretrained_score], axis=1)
eth_tweets_merged = pd.concat([eth_tweets,
                               eth_tweets_bart.bart_mnli_bullish_score,
                               eth_tweets_roberta.twitter_roberta_pretrained_score], axis=1)
eth_reddit_merged = pd.concat([reddit_r_ethereum,
                               eth_reddit_bart.bart_mnli_bullish_score,
                               eth_reddit_roberta.twitter_roberta_pretrained_score], axis=1)

In [7]:
print(f'''Top Bitcoin posts by dataset and by LLM model:

---------------------      
Bitcoin News
---------------------      

Top Twitter RoBERTa news:

{btc_news_merged.sort_values('twitter_roberta_pretrained_score', ascending=False).title.head(10).values}

Top BART MNLI news:

{btc_news_merged.sort_values('bart_mnli_bullish_score', ascending=False).title.head(10).values}

Top RoBERTa finetuned news:

{btc_news_roberta_finetuned.sort_values('roberta_finetuned_score', ascending=False).text.head(10).values}

---------------------      
Bitcoin Tweets
---------------------      

Top Twitter RoBERTa tweets:

{btc_tweets_merged.sort_values('twitter_roberta_pretrained_score', ascending=False).content_cleaned.head(10).values}

Top BART MNLI tweets:

{btc_tweets_merged.sort_values('bart_mnli_bullish_score', ascending=False).content_cleaned.head(10).values}

Top RoBERTa finetuned tweets:

{btc_tweets_roberta_finetuned.sort_values('roberta_finetuned_score', ascending=False).text.head(10).values}

---------------------      
Bitcoin Reddit
---------------------      

Top Twitter RoBERTa reddit posts:

{btc_reddit_merged.sort_values('twitter_roberta_pretrained_score', ascending=False).content.head(10).values}

Top BART MNLI reddit posts:

{btc_reddit_merged.sort_values('bart_mnli_bullish_score', ascending=False).content.head(10).values}

Top RoBERTa finetuned reddit posts:

{btc_reddit_roberta_finetuned.sort_values('roberta_finetuned_score', ascending=False).text.head(10).values}
''')


Top Bitcoin posts by dataset and by LLM model:

---------------------      
Bitcoin News
---------------------      

Top Twitter RoBERTa news:

['‘This Is So Awesome!’: Al Roker and Wife Deborah Roberts Inspire Fans with Heartfelt Announcement - Yahoo News'
 'Happy 10th Birthday, Bitcoin.org! - Modern Consensus'
 "Powers On... It's been a wonderful life (week): SEC Commissioner ... - Cointelegraph"
 'FanAnywhere is pleased and immensely proud to announce itself as a portfolio company of Polygon Studio - Cointelegraph'
 '2022 ABGA Blockchain Gaming Summit was a big success on Sept. 27 - Cointelegraph'
 'An amazing race during the bear market: Seedify launches Amazy with impressive results - Cointelegraph'
 "Happy International Women's Day! Leaders share their experiences in crypto - Cointelegraph"
 'Phemex brings joy to your home this Christmas - Cointelegraph'
 'Happy 5th Birthday Ethereum: this is your life - Decrypt'
 'Harvard named world’s best university for the 9th year in a row 

In [8]:
print(f'''Top Ethereum posts by dataset and by LLM model:

---------------------      
Ethereum News
---------------------      

Top Twitter RoBERTa news:

{eth_news_merged.sort_values('twitter_roberta_pretrained_score', ascending=False).title.head(10).values}

Top BART MNLI news:

{eth_news_merged.sort_values('bart_mnli_bullish_score', ascending=False).title.head(10).values}

Top RoBERTa finetuned news:

{eth_news_roberta_finetuned.sort_values('roberta_finetuned_score', ascending=False).text.head(10).values}

---------------------      
Ethereum Tweets
---------------------      

Top Twitter RoBERTa tweets:

{eth_tweets_merged.sort_values('twitter_roberta_pretrained_score', ascending=False).content_cleaned.head(10).values}

Top BART MNLI tweets:

{eth_tweets_merged.sort_values('bart_mnli_bullish_score', ascending=False).content_cleaned.head(10).values}

Top RoBERTa finetuned tweets:

{eth_tweets_roberta_finetuned.sort_values('roberta_finetuned_score', ascending=False).text.head(10).values}

---------------------      
Ethereum Reddit
---------------------      

Top Twitter RoBERTa reddit posts:

{eth_reddit_merged.sort_values('twitter_roberta_pretrained_score', ascending=False).content.head(10).values}

Top BART MNLI reddit posts:

{eth_reddit_merged.sort_values('bart_mnli_bullish_score', ascending=False).content.head(10).values}

Top RoBERTa finetuned reddit posts:

{eth_reddit_roberta_finetuned.sort_values('roberta_finetuned_score', ascending=False).text.head(10).values}
''')

Top Ethereum posts by dataset and by LLM model:

---------------------      
Ethereum News
---------------------      

Top Twitter RoBERTa news:

['Happy 10th Birthday, Bitcoin.org! - Modern Consensus'
 'Great news for strategic traders — Earn easily with CoinEx’s spot grid feature! - Cointelegraph'
 "Powers On... It's been a wonderful life (week): SEC Commissioner ... - Cointelegraph"
 'FanAnywhere is pleased and immensely proud to announce itself as a portfolio company of Polygon Studio - Cointelegraph'
 'Happy Birthday, Bitcoin! Industry players share a few words - Cointelegraph'
 'An amazing race during the bear market: Seedify launches Amazy with impressive results - Cointelegraph'
 'Blockchain Fest Singapore 2023 Wraps Up with Great Success - Cointelegraph'
 "Happy International Women's Day! Leaders share their experiences in crypto - Cointelegraph"
 'Phemex brings joy to your home this Christmas - Cointelegraph'
 'Happy 5th Birthday Ethereum: this is your life - Decrypt']

Top 