In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np

from functions.load_data import load_tweets, load_labelled_data
from functions.tokenizer import apply_keras_tokenizer, load_model_and_tokenizer
from functions.data_modification import aggregate_sentiment
from functions.apply_models import apply_model
from functions.preprocessing import preprocess_tweets, concat_tweet_files
from functions.sentiwordnet import SentimentAnalysis



In [None]:
# load all tweets into different variables
coinlist = ['ADA', 'BCH','BCN','DASH','EOS','ETC','ETH','ICX','IOT','LTC','NEO','QTUM','TRX','VEN','XEM','XLM','XMR','XRP','ZEC']

# Load the model and tokenizer which were used to train the model
my_model, my_tokenizer = load_model_and_tokenizer('models_and_tokenizers/randomforest.sav', 'models_and_tokenizers/tokenizer_1500_man.sav')

# Load SentiWordNet
s = SentimentAnalysis(filename='data/SentiWordNet.txt',weighting='geometric')

In [None]:
# Apply both Method 1 and 2 to the tweets and aggregate them
for f in coinlist:
    filename = f + '_semantic_filtered.csv'
    my_tweets = pd.read_csv('data/filtered_tweets/' + filename)
    
    # Make the tweets and the header all lowercase
    my_tweets = preprocess_tweets(my_tweets)
    #my_tweets.columns = my_tweets.columns.str.lower()
    
    # Tokenize the tweets using the tokenizer
    my_tokenized_tweets = apply_keras_tokenizer(my_tweets, my_tokenizer)
    
    # Label the tweets by using the model
    my_tweets = apply_model(my_tweets, my_tokenized_tweets, my_model)

    # Apply the sentiwordnet evaluation
    my_tweets['sentiwordnet'] = [s.score(tweet) for tweet in my_tweets['text']]
    
    # Save the .csv
    pd.DataFrame.to_csv(my_tweets, 'data/labelled_tweets/' + f + '_randomforest_sentiwordnet_labelled.csv')
    

    
     # Aggregate the tweets hourly
    aggregated_tweets = aggregate_sentiment(my_tweets, '1H')
    
    # Save the aggregated tweets to a .csv file
    pd.DataFrame.to_csv(aggregated_tweets, 'data/aggregated_tweets/' + f + '_sentiment_aggr_1h_shifted.csv')
    
    # Aggregate the tweets 6h
    aggregated_tweets = aggregate_sentiment(my_tweets, '6H')
    
    # Save the aggregated tweets to a .csv file
    pd.DataFrame.to_csv(aggregated_tweets, 'data/aggregated_tweets/' + f + '_sentiment_aggr_6h_shifted.csv')
    
     # Aggregate the tweets 12h
    aggregated_tweets = aggregate_sentiment(my_tweets, '12H')
    
    # Save the aggregated tweets to a .csv file
    pd.DataFrame.to_csv(aggregated_tweets, 'data/aggregated_tweets/' + f + '_sentiment_aggr_12h_shifted.csv')