In [16]:
from twarc import Twarc2, expansions
import json
import pandas as pd
from datetime import datetime, timedelta, timezone
from sqlalchemy import create_engine
from decouple import config

In [17]:
dbuname = config('DBUNAME')
dbpwd = config('DBPWD')
host = config('HOST')
port = config('PORT')
dbname = config('DBNAME')
bearer_token_aca = config('BEARER_TOKEN_ACA')

In [20]:
client = Twarc2(bearer_token=bearer_token_aca)

In [29]:
def fetch_load_tweetcount_hist(label, ht1, ht2, start, end, method):
    
        # Define query, time period and table_name
        query = '('+ ht1 + ' OR ' + ht2 + ') -is:retweet'
        start = start
        end = end
        table_name = 'table_tw_'+label
                
        while True:    
            #fetch data
            try:
                count_results = client.counts_all(query=query, start_time=start, end_time=end, granularity='day')
                
                tweets_df = pd.DataFrame()
                
                # The Twitter API v2 returns the Tweet information and the user, media etc.  separately
                # so we use expansions.flatten to get all the information in a single JSON
                for page in count_results:
                    result = expansions.flatten(page)
                    for row in result:
                        tweets_df = tweets_df.append(pd.DataFrame([{'date': row['end'], 
                                                'tweet_count': row['tweet_count']}]))                                
                
                tweets_df['date'] = pd.to_datetime(tweets_df['date'])
                tweets_df['label'] = label
                tweets_df = tweets_df.sort_values('date')
                tweets_df = tweets_df.reset_index(drop=True)                
                
            except Exception as e:
                print(e)
                print("tweet count failed: " + label)
                break
        
            #load data
            if_ex_val = method
        
            try:
                conn_string = 'postgresql://'+dbuname+':'+dbpwd+'@'+host+':'+port+'/'+dbname
                engine = create_engine(conn_string)
                tweets_df.to_sql(table_name, conn_string, if_exists = if_ex_val, index=False) 
            except Exception as e:
                print(e)
                print("Data load failed: " + table_name)     
                break
            break  
 
            


In [30]:
# define hashtags    
hashtags = {'bitcoin': ['#bitcoin','#btc'],'ethereum': ['#eth', '#ethereum'], 'binance': ['#binance','#bnb'],'ripple': ['#ripple','#xrp'],
           'terra':['#terra','#luna'], 'cardano': ['#cardano','#ada'], 'solana': ['#sol','#solana'], 'avalanche': ['#avalanche','#avax'],
           'polkadot':['#polkadot','#dot'],'dogecoin':['#dogecoin', '#doge'], 'msci_world': ['#msciworld', '#msci'],'euro_stoxx': ['#EXW1.DE','#eurostoxx'],
            'smi':['#CSSMI.SW', '#smi'], 'nasdaq': ['#nasdaq', '#EXXT.DE'], 'gold':['#gold','#GC=F'], 'silver':['#silver','#SI=F']}   

In [31]:
# load of historical data

# set timeframe
end = datetime(2022, 4, 13, 0, 0, 0, 0, timezone.utc)
start = datetime(2020, 12, 31, 0, 0, 0, 0, timezone.utc)
 
# set method of function df.to_sql()
method = 'replace'

# execute function
for label in hashtags:
    ht1 = hashtags[label][0]
    ht2 = hashtags[label][1]
    fetch_load_tweetcount_hist(label, ht1, ht2, start, end, method)
        