In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
import joblib

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

import datetime
import math

pd.set_option('display.max_rows', 10000)

%matplotlib inline
%reload_ext tensorboard

In [3]:
np.random.seed(42)
tf.random.set_seed(42)

def create_split(df, pct_train, pct_val, batch_size, window_size):
    length = df.shape[0]
    temp_train_size = find_batch_gcd(math.floor(pct_train * length), batch_size)
    test_size = length - temp_train_size
    train_size = find_batch_gcd(math.floor((1 - pct_val) * temp_train_size), batch_size)
    val_size = temp_train_size - train_size
    df_train = df[:- val_size - test_size]
    df_val = df[- val_size - test_size - window_size:- test_size]
    df_test = df[- test_size - window_size:]
    return df_train, df_val, df_test

def find_batch_gcd(length, batch_size):
    while length % batch_size != 0:
        length -= 1
    return length

def create_dataset(df, window_size):
    X, y = [], []
    for i in range(len(df) - window_size):
        v = df.iloc[i:(i + window_size)].values
        X.append(v)
        y.append(df["Close"].iloc[i + window_size])
    return np.array(X), np.array(y)

def create_multi_pred_dataset(df, window_size, time_steps):
    X, y = [], []
    for i in range(len(df) - window_size - time_steps - 1):
        v = df.iloc[i:(i + window_size)].values
        X.append(v)
        y.append(df["Close"].iloc[i + window_size:i + window_size + time_steps].values)
    return np.array(X), np.array(y)

def create_model(nodes, optimizer, dropout, X_train):
    model = Sequential()
    model.add(LSTM(nodes[0], input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=True))
    model.add(LSTM(nodes[1], return_sequences=True))
    model.add(LSTM(nodes[2]))
    model.add(Dropout(dropout))
    model.add(Dense(nodes[3]))
    model.compile(loss="mse", optimizer=optimizer, metrics=['mae'])
    return model

def flatten_prediction(pred, pred_count, time_steps):
    print(pred_count, pred.shape[0])
    pred = pred[::time_steps]
    pred = pred.flatten()
    if pred_count < pred.shape[0]:
        pred = pred[:pred_count - pred.shape[0]]
    return pred

def evaluate_forecast(pred, actual):
    mse = mean_squared_error(pred, actual)
    print("Test Mean Squared Error:", mse)
    mae = mean_absolute_error(pred, actual)
    print("Test Mean Absolute Error:", mae)
    return

def train_model(pair, batch_size, window_size, nodes_arr, optimizer, dropout, epochs):
    series = pd.read_csv("../data/processed/{}_processed.csv".format(pair))
    
    buy = pair[:3]
    sell = pair[3:]
    
    series = series[series.shape[0] % batch_size:]
    close = series[['Real Close']]

    series = series.drop(['Time', 'Real Close'], axis=1)
    series = series[['Close', 'EMA_10', 'EMA_50', 'RSI', 'A/D Index',
                     '{} Interest Rate'.format(buy), '{} Interest Rate'.format(sell), '{}_CPI'.format(buy), '{}_CPI'.format(sell),
                     '{} Twitter Sentiment'.format(buy), '{} Twitter Sentiment'.format(sell),
                     '{} News Sentiment'.format(buy), '{} News Sentiment'.format(sell),
                     #'EUR_GDP', 'USD_GDP', 'EUR_PPI', 'USD_PPI', 'USD Unemployment Rate', 'EUR Unemployment Rate'
                    ]]

    df_train, df_val, df_test = create_split(series, 0.75, 0.1, batch_size, window_size)
    print(f'df_train.shape {df_train.shape}, df_validation.shape {df_val.shape}, df_test.shape {df_test.shape}')

    closeScaler = MinMaxScaler(feature_range=(0, 1))
    featureScaler = MinMaxScaler(feature_range=(0, 1))
    
    df_train = df_train.copy()
    df_val = df_val.copy()
    df_test = df_test.copy()
    df_train.loc[:, ['Close']] = closeScaler.fit_transform(df_train[['Close']])
    df_train.loc[:, ~df_train.columns.isin(['Close'])] = featureScaler.fit_transform(df_train.loc[:, ~df_train.columns.isin(['Close'])])
    df_val.loc[:, ['Close']] = closeScaler.transform(df_val[['Close']])
    df_val.loc[:, ~df_val.columns.isin(['Close'])] = featureScaler.transform(df_val.loc[:, ~df_val.columns.isin(['Close'])])
    df_test.loc[:, ['Close']] = closeScaler.transform(df_test[['Close']])
    df_test.loc[:, ~df_test.columns.isin(['Close'])] = featureScaler.transform(df_test.loc[:, ~df_test.columns.isin(['Close'])])

    #X_train, y_train = create_dataset(df_train, window_size)
    #X_val, y_val = create_dataset(df_val, window_size)
    #X_test, y_test = create_dataset(df_test, window_size)
    
    X_train, y_train = create_multi_pred_dataset(df_train, window_size, nodes_arr[3])
    X_val, y_val = create_multi_pred_dataset(df_val, window_size, nodes_arr[3])
    print(X_train.shape)
    print(y_train.shape)
    print(X_val.shape)
    print(y_val.shape)
    #X_test, y_test = create_multi_pred_dataset(df_test, window_size, nodes_arr[3])

    model = create_model(nodes_arr, optimizer, dropout, X_train)
    
    current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

    log_dir = "logs/tuning/" + current_time
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, update_freq='epoch', profile_batch=0, histogram_freq=1)

    history = model.fit(X_train, y_train,
                    validation_data=(X_val, y_val),
                    epochs=epochs,
                    batch_size=batch_size,
                    shuffle=False,
                    #callbacks=[tensorboard_callback]
                   )
    
    return model, closeScaler, featureScaler

def visualize_loss(history):
    fig = plt.figure(figsize=(16, 10))
    ax1 = fig.subplots(1)
    ax1.set_title('Model Loss')
    ax1.set(xlabel='Epoch', ylabel='Loss')
    ax1.plot(history.history['loss'], label='Train Loss')
    ax1.plot(history.history['val_loss'], label='Val Loss')
    ax1.legend()

In [4]:
def test_model(pair, window_size, batch_size, time_steps, model, scaler, fScaler):
    buy = pair[:3]
    sell = pair[3:]

    series = pd.read_csv("../data/processed/{}_processed.csv".format(pair))
    series = series[series.shape[0] % batch_size:]
    close = series[['Time', 'Real Close', 'Close']]
    close = close.copy()
    close['PrevClose'] = close['Close'].shift(1)

    series = series.drop(['Time', 'Real Close'], axis=1)
    series = series[['Close', 'EMA_10', 'EMA_50', 'RSI', 'A/D Index',
                     '{} Interest Rate'.format(buy), '{} Interest Rate'.format(sell), '{}_CPI'.format(buy), '{}_CPI'.format(sell),
                     '{} Twitter Sentiment'.format(buy), '{} Twitter Sentiment'.format(sell),
                     '{} News Sentiment'.format(buy), '{} News Sentiment'.format(sell),
                     #'EUR_GDP', 'USD_GDP', 'EUR Unemployment Rate', 'USD Unemployment Rate', 'EUR_PPI', 'USD_PPI'
                    ]]

    df_train, df_val, df_test = create_split(series, 0.75, 0.1, batch_size, window_size)
    print(f'df_train.shape {df_train.shape}, df_validation.shape {df_val.shape}, df_test.shape {df_test.shape}')
    df_test = df_test.copy()
    df_test.loc[:, ['Close']] = scaler.transform(df_test[['Close']])
    df_test.loc[:, ~df_test.columns.isin(['Close'])] = fScaler.transform(df_test.loc[:, ~df_test.columns.isin(['Close'])])
    
    X_test, y_test = create_dataset(df_test, window_size)
    #X_test, y_test = create_multi_pred_dataset(df_test, window_size, 5)

    y_pred = model.predict(X_test)

    multi_pred = flatten_prediction(y_pred, y_test.shape[0], time_steps)
    evaluate_forecast(multi_pred, y_test)

    #mse = model.evaluate(X_test, y_test)
    #print("Test Mean Squared Error:", mse)

    index = [i for i in range(multi_pred.shape[0])]
    df_predicted = pd.DataFrame(scaler.inverse_transform(multi_pred.reshape(-1, 1)), columns=['Close'], index=index)
    df_actual = pd.DataFrame(scaler.inverse_transform(y_test.reshape(-1, 1)), columns=['Close'], index=index)

    df = pd.DataFrame(close[-multi_pred.shape[0] - window_size:])
    df.reset_index(inplace=True, drop=True)
    #print(df_test[['Close']][:20])
    #print(scaler.inverse_transform(df_test[['Close']])[:20])
    #print(scaler.inverse_transform(y_test.reshape(-1, 1))[:20])
    
    df = df[window_size:]
    df.reset_index(inplace=True, drop=True)
    #print(df[:20])
    df['rip'] = df_actual['Close']
    
    #df_predicted['Close'] = df['Real Close'].mul(np.exp(df_predicted['Close'].shift(-1))).shift(1)
    df_actual = df['Real Close'].mul(np.exp(df['Close']).shift(-1)).shift(1)
    print(df[:20])
    print(df_actual[:20])

    
    #evaluate_forecast(df_predicted['Close'].iloc[1:], df_actual['Close'].iloc[1:])

    #return df_predicted, df_actual
    
    #index = [i for i in range(y_pred.shape[0])]
    #df_predicted = pd.DataFrame(scaler.inverse_transform(y_pred), columns=['Close'], index=index)
    #df_actual = pd.DataFrame(scaler.inverse_transform(y_test.reshape(-1, 1)), columns=['Close'], index=index)
    
    #df = pd.DataFrame(close['Real Close'][-y_pred.shape[0] - window_size:-window_size])
    #df.reset_index(inplace=True, drop=True)
    
    #df_predicted['Close'] = df['Real Close'].mul(np.exp(df_predicted['Close'].shift(-1))).shift(1)
    #df_actual['Close'] = df['Real Close'].mul(np.exp(df_actual['Close'].shift(-1))).shift(1)
    
    #df_predicted['Close'] = df_predicted['Close']
    #df_actual['Close'] = df_actual['Close']
    
    #return df_predicted, df_actual

def visualize_prediction(df_predicted, df_actual):
    fig = plt.figure(figsize=(16, 10))
    ax1 = fig.subplots(1)
    ax1.set_title('Predicted Closing Price')
    ax1.set(xlabel='Time', ylabel='Close')
    ax1.plot(df_actual['Close'][:100], label='Actual')
    ax1.plot(df_predicted['Close'][:100], label='Prediction')
    ax1.legend()

In [7]:
batch_size = 32
window_size = 10
nodes = [80, 64, 32, 5]
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)
dropout = 0.2
epochs = 1

model, closeScaler, featureScaler = train_model("EURUSD", batch_size, window_size, nodes, optimizer, dropout, epochs)

df_train.shape (50400, 13), df_validation.shape (5610, 13), df_test.shape (18698, 13)
(50384, 10, 13)
(50384, 5)
(5594, 10, 13)
(5594, 5)


In [8]:
cool = test_model("EURUSD", window_size, batch_size, 5, model, closeScaler, featureScaler)

df_train.shape (50400, 13), df_validation.shape (5610, 13), df_test.shape (18698, 13)
18688 18688
Test Mean Squared Error: 0.007084544323230347
Test Mean Absolute Error: 0.07748319441465179
                         Time  Real Close     Close  PrevClose       rip
0   2020-04-02 02:00:00+00:00     1.09369 -0.000603  -0.000603 -0.000603
1   2020-04-02 02:15:00+00:00     1.09431  0.000567  -0.000603  0.000567
2   2020-04-02 02:30:00+00:00     1.09435  0.000037   0.000567  0.000037
3   2020-04-02 02:45:00+00:00     1.09335 -0.000914   0.000037 -0.000914
4   2020-04-02 03:00:00+00:00     1.09342  0.000064  -0.000914  0.000064
5   2020-04-02 03:15:00+00:00     1.09350  0.000073   0.000064  0.000073
6   2020-04-02 03:30:00+00:00     1.09375  0.000229   0.000073  0.000229
7   2020-04-02 03:45:00+00:00     1.09376  0.000009   0.000229  0.000009
8   2020-04-02 04:00:00+00:00     1.09385  0.000082   0.000009  0.000082
9   2020-04-02 04:15:00+00:00     1.09377 -0.000073   0.000082 -0.000073
10  202

In [10]:
from TwitterAPI import TwitterAPI, TwitterPager
import datetime
from dotenv import load_dotenv
import re
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import os
#from sentiment_keyword_defs import SENTIMENT_KEYWORDS
load_dotenv()

import time
import functools

def timeit(func):
    @functools.wraps(func)
    def newfunc(*args, **kwargs):
        startTime = time.time()
        func(*args, **kwargs)
        elapsedTime = time.time() - startTime
        print('function [{}] finished in {} ms'.format(
            func.__name__, int(elapsedTime * 1000)))
    return newfunc

In [11]:
sentiment_keyword = {
        "usd": {
            "positive": [
                "usd/",
                "u.s.",
                "greenback",
                "buck",
                "barnie",
                "america",
                "united states",
            ],
            "negative": ["/usd", "cable"],
        },
        "aud": {
            "positive": ["aud/", "gold", "aussie", "australia"],
            "negative": ["/aud"],
        },
        "gbp": {
            "positive": [
                "gbp/",
                "sterling",
                "pound",
                "u.k.",
                "united kingdom",
                "cable",
                "guppy",
            ],
            "negative": ["/gbp"],
        },
        "nzd": {
            "positive": ["nzd/", "gold", "kiwi", "new zealand"],
            "negative": ["/nzd"],
        },
        "cad": {"positive": ["cad/", "oil", "loonie", "canada"], "negative": ["/cad"]},
        "chf": {"positive": ["chf/", "swiss"], "negative": ["/chf"]},
        "jpy": {"positive": ["jpy/", "asian", "japan"], "negative": ["/jpy", "guppy"]},
        "eur": {"positive": ["eur/", "fiber", "euro"], "negative": ["/eur"]},
    }

api = TwitterAPI(consumer_key=os.getenv("TWITTER_CONSUMER_KEY"), consumer_secret=os.getenv("TWITTER_CONSUMER_SECRET"),
                 access_token_key=os.getenv("TWITTER_ACCESS_TOKEN_KEY"), access_token_secret=os.getenv("TWITTER_ACCESS_TOKEN_SECRET"), api_version='2')

@timeit
def get_twitter_data(start_time):
    pager = TwitterPager(api, 'tweets/search/recent', {
        'query': 'from:FXstreetNews OR from:forexcom',
        'tweet.fields': 'public_metrics,created_at',
        'start_time': str(start_time),
        'max_results': 100
        }
    )
    tweet_data = []
    for item in pager.get_iterator(new_tweets=False):
        tweet_data.append({"text": item['text'], "created_at": item['created_at']})
        print(item)
    return tweet_data

@timeit
def tweet_sentiment(tweets):
    sid = SentimentIntensityAnalyzer()
    for tweet_data in tweets:
        tweet_data["text"] = remove_pattern(tweet_data["text"], "RT @[\w]*:")
        tweet_data["text"] = remove_pattern(tweet_data["text"], "@[\w]*")
        tweet_data["text"] = remove_pattern(tweet_data["text"], "https?://[A-Za-z0-9./]*")
        tweet_data["text"] = tweet_data["text"].replace("[^a-zA-Z]", " ")
        tweet_data["text"] = tweet_data["text"].replace("\n", " ")
        tweet_data["score"] = sid.polarity_scores(tweet_data["text"])["compound"]
    return tweets

def remove_pattern(input_text, pattern):
    """
    Finds patterns in posts and substitutes them with blank space.

    Args:
        input_text: String representing a twitter post
        pattern: Regex pattern to search for in twitter post

    Returns:
        String with pattern stripped.
    """
    match = re.findall(pattern, input_text)
    for i in match:
        input_text = re.sub(i, "", input_text)
    return input_text

In [3]:
tweets = get_twitter_data((datetime.datetime.now() - datetime.timedelta(hours=48)).isoformat("T") + "Z")
tweet_sentiment(tweets)

{'public_metrics': {'retweet_count': 1, 'reply_count': 0, 'like_count': 3, 'quote_count': 0}, 'text': 'Bitcoin Weekly Forecast: SEC commissioner cozies up to BTC ETF, on-chain metrics reset making way for volatile move https://t.co/1kI5vzvhPi', 'created_at': '2021-04-11T05:00:21.000Z', 'id': '1381109846464954371'}
{'public_metrics': {'retweet_count': 1, 'reply_count': 0, 'like_count': 4, 'quote_count': 0}, 'text': 'AUD/USD Weekly Forecast: Bulls still cautious but unwilling to give up https://t.co/cYffvEzqrT', 'created_at': '2021-04-10T12:00:59.000Z', 'id': '1380853312052981761'}
{'public_metrics': {'retweet_count': 2, 'reply_count': 0, 'like_count': 4, 'quote_count': 0}, 'text': 'USD/JPY Weekly Forecast: Fed fails to dim dollar prospects https://t.co/GYkwTbnChl', 'created_at': '2021-04-10T05:00:22.000Z', 'id': '1380747462852419585'}
{'public_metrics': {'retweet_count': 1, 'reply_count': 0, 'like_count': 4, 'quote_count': 0}, 'text': 'GBP/USD Weekly Forecast: Is the correction over? US

[{'text': 'Bitcoin Weekly Forecast: SEC commissioner cozies up to BTC ETF, on-chain metrics reset making way for volatile move ',
  'created_at': '2021-04-11T05:00:21.000Z',
  'score': 0.0},
 {'text': 'AUD/USD Weekly Forecast: Bulls still cautious but unwilling to give up ',
  'created_at': '2021-04-10T12:00:59.000Z',
  'score': -0.0516},
 {'text': 'USD/JPY Weekly Forecast: Fed fails to dim dollar prospects ',
  'created_at': '2021-04-10T05:00:22.000Z',
  'score': -0.1531},
 {'text': 'GBP/USD Weekly Forecast: Is the correction over? US consumer, vaccines hold keys ',
  'created_at': '2021-04-09T22:00:52.000Z',
  'score': 0.0},
 {'text': ' 💥  Week Ahead: Lockdowns and “re-openings”, big data dump, and earnings season begins! 💥 #coronavirus #weekendvibes #lum…',
  'created_at': '2021-04-09T21:37:57.000Z',
  'score': -0.784},
 {'text': 'Wall Street Close: Another day, another record close for the S&amp;P 500 as Fed eases inflation worries By   #SP500 #Equities',
  'created_at': '2021-04-0

In [4]:
print(tweets)

[{'text': 'Bitcoin Weekly Forecast: SEC commissioner cozies up to BTC ETF, on-chain metrics reset making way for volatile move ', 'created_at': '2021-04-11T05:00:21.000Z', 'score': 0.0}, {'text': 'AUD/USD Weekly Forecast: Bulls still cautious but unwilling to give up ', 'created_at': '2021-04-10T12:00:59.000Z', 'score': -0.0516}, {'text': 'USD/JPY Weekly Forecast: Fed fails to dim dollar prospects ', 'created_at': '2021-04-10T05:00:22.000Z', 'score': -0.1531}, {'text': 'GBP/USD Weekly Forecast: Is the correction over? US consumer, vaccines hold keys ', 'created_at': '2021-04-09T22:00:52.000Z', 'score': 0.0}, {'text': ' 💥  Week Ahead: Lockdowns and “re-openings”, big data dump, and earnings season begins! 💥 #coronavirus #weekendvibes #lum…', 'created_at': '2021-04-09T21:37:57.000Z', 'score': -0.784}, {'text': 'Wall Street Close: Another day, another record close for the S&amp;P 500 as Fed eases inflation worries By   #SP500 #Equities', 'created_at': '2021-04-09T20:32:11.000Z', 'score': 

In [5]:
sentiment = tweet_sentiment(tweets)
sentiment

[{'text': 'Bitcoin Weekly Forecast: SEC commissioner cozies up to BTC ETF, on-chain metrics reset making way for volatile move ',
  'created_at': '2021-04-11T05:00:21.000Z',
  'score': 0.0},
 {'text': 'AUD/USD Weekly Forecast: Bulls still cautious but unwilling to give up ',
  'created_at': '2021-04-10T12:00:59.000Z',
  'score': -0.0516},
 {'text': 'USD/JPY Weekly Forecast: Fed fails to dim dollar prospects ',
  'created_at': '2021-04-10T05:00:22.000Z',
  'score': -0.1531},
 {'text': 'GBP/USD Weekly Forecast: Is the correction over? US consumer, vaccines hold keys ',
  'created_at': '2021-04-09T22:00:52.000Z',
  'score': 0.0},
 {'text': ' 💥  Week Ahead: Lockdowns and “re-openings”, big data dump, and earnings season begins! 💥 #coronavirus #weekendvibes #lum…',
  'created_at': '2021-04-09T21:37:57.000Z',
  'score': -0.784},
 {'text': 'Wall Street Close: Another day, another record close for the S&amp;P 500 as Fed eases inflation worries By   #SP500 #Equities',
  'created_at': '2021-04-0

In [12]:
pd.set_option('display.max_rows', 100)

@timeit
def combine_dates(tweets):
    """
    Merge sentiment scores according to date.

    Args:
        tweets: Dataframe containing countries and their sentiment scores at a certain time

    Returns:
        Dataframe with a country's sentiment score with sequential time.
    """
    currencies = ["eur", "usd", "jpy", "cad", "gbp", "aud", "nzd", "chf"]
    length = 1
    for i in range(1, len(tweets.index)):
        current = tweets.at[i, "Time"]
        if current == tweets.at[i - length, "Time"] and i == len(tweets.index) - 1:
            for currency in currencies:
                tweets.at[i - length, currency.upper()] = (
                    tweets[currency.upper()].iloc[i - length : i].mean()
                )
        elif current == tweets.at[i - length, "Time"]:
            length += 1
        elif length > 1:
            for currency in currencies:
                tweets.at[i - length, currency.upper()] = (
                    tweets[currency.upper()].iloc[i - length : i].mean()
                )
            length = 1
    tweets.drop_duplicates(subset=["Time"], inplace=True)
    return tweets

@timeit
def country_sentiment_df(tweets, start, window):
    tweet_df = pd.DataFrame()
    tweet_df['Time'] = [datetime.datetime.strptime(tweet['created_at'], "%Y-%m-%dT%H:%M:%S.%fZ") for tweet in tweets]
    tweet_df['Time'] = tweet_df['Time'].dt.strftime("%Y-%m-%d %H:%M:00")
    tweet_df['Twitter_Sentiment'] = [tweet['score'] for tweet in tweets]
    tweet_df['Post'] = [tweet['text'].lower() for tweet in tweets]
    
    country_df = pd.DataFrame()
    for currency in sentiment_keyword:
        for entity in sentiment_keyword[currency]["positive"]:
            currency_df = tweet_df[tweet_df['Post'].str.contains(entity)]
            currency_df = currency_df[{"Time", "Twitter_Sentiment"}]
            currency_df = currency_df.rename(
                columns={"Twitter_Sentiment": currency.upper()}
            )
            if country_df.empty:
                country_df = currency_df
            elif not currency.upper() in country_df.columns:
                country_df = country_df.merge(currency_df, how="outer", on="Time")
            else:
                country_df = country_df.merge(
                    currency_df, how="outer", on=["Time", currency.upper()]
                )
        for entity in sentiment_keyword[currency]['negative']:
            currency_df = tweet_df[tweet_df['Post'].str.contains(entity)]
            currency_df = currency_df[{"Time", "Twitter_Sentiment"}]
            currency_df["Twitter_Sentiment"] = currency_df[
                    "Twitter_Sentiment"
                ].transform(lambda score: -score)
            currency_df = currency_df.rename(
                columns={"Twitter_Sentiment": currency.upper()}
            )
            if country_df.empty:
                country_df = currency_df
            elif not currency.upper() in country_df.columns:
                country_df = country_df.merge(currency_df, how="outer", on="Time")
            else:
                country_df = country_df.merge(
                    currency_df, how="outer", on=["Time", currency.upper()]
                )
    
    print(country_df)
    time_frame = pd.date_range(
        start=start, freq="1T", end=str(datetime.datetime.now())
    )
    time_frame = pd.DataFrame(time_frame, columns=["Time"])
    time_frame["Time"] = time_frame["Time"].dt.strftime("%Y-%m-%d %H:%M:%S")
    country_df = country_df.reset_index(drop=True)
    country_df = combine_dates(country_df)
    
    country_df = time_frame.merge(country_df, how="left", on="Time")
    country_df = country_df.sort_values(by="Time", ascending=True)
    
    for currency in sentiment_keyword:
        country_df[currency.upper()] = (
            country_df[currency.upper()].rolling(window, min_periods=1).mean()
        )
    country_df = country_df.fillna(0)
    
    return country_df

In [7]:
nice = country_sentiment_df(sentiment, str(datetime.datetime.strftime(datetime.datetime.now() - datetime.timedelta(hours=27), "%Y-%m-%d %H:%M:00")), 60)

                   Time     USD     AUD     GBP     NZD     CAD  CHF     JPY  \
0   2021-04-10 05:00:00 -0.1531     NaN     NaN     NaN     NaN  NaN     NaN   
1   2021-04-10 05:00:00     NaN     NaN     NaN     NaN     NaN  NaN  0.1531   
2   2021-04-09 16:35:00  0.0000     NaN     NaN     NaN     NaN  NaN     NaN   
3   2021-04-09 16:33:00  0.5859     NaN     NaN     NaN     NaN  NaN     NaN   
4   2021-04-09 16:33:00     NaN     NaN     NaN     NaN -0.5859  NaN     NaN   
5   2021-04-09 14:16:00  0.0000     NaN     NaN     NaN     NaN  NaN     NaN   
6   2021-04-09 14:16:00     NaN     NaN     NaN     NaN     NaN  NaN -0.0000   
7   2021-04-09 17:08:00  0.4019     NaN     NaN     NaN     NaN  NaN     NaN   
8   2021-04-09 17:08:00 -0.4019     NaN     NaN     NaN     NaN  NaN     NaN   
9   2021-04-09 17:08:00     NaN  0.4019     NaN     NaN     NaN  NaN     NaN   
10  2021-04-09 17:08:00     NaN     NaN     NaN  0.4019     NaN  NaN     NaN   
11  2021-04-09 15:15:00  0.0000     NaN 

In [128]:
nice[-500:-400]

Unnamed: 0,Time,USD,AUD,GBP,NZD,CAD,CHF,JPY,EUR
1121,2021-04-10 12:22:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34
1122,2021-04-10 12:23:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34
1123,2021-04-10 12:24:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34
1124,2021-04-10 12:25:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34
1125,2021-04-10 12:26:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34
1126,2021-04-10 12:27:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34
1127,2021-04-10 12:28:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34
1128,2021-04-10 12:29:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34
1129,2021-04-10 12:30:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34
1130,2021-04-10 12:31:00,-0.0203,-0.0258,0.0,0.0,0.0,0.0,0.1531,-0.34


In [137]:
generate_fake_ohlc_data(nice)

Unnamed: 0,Time,USD,AUD,GBP,NZD,CAD,CHF,JPY,EUR,Close,Open,High,Low,Volume
0,2021-04-09 21:40:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000550,1.000451,1.001144,1.000301,70.0
1,2021-04-09 21:41:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000832,1.000403,1.001738,1.000297,65.0
2,2021-04-09 21:42:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.001193,1.001207,1.001389,1.001109,87.0
3,2021-04-09 21:43:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000908,1.000967,1.001519,1.000049,16.0
4,2021-04-09 21:44:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.000643,1.001041,1.001986,1.000349,31.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1616,2021-04-11 00:36:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.991415,0.991127,0.991741,0.990986,45.0
1617,2021-04-11 00:37:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.990613,0.990971,0.991258,0.989795,31.0
1618,2021-04-11 00:38:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.990199,0.990517,0.991057,0.989270,26.0
1619,2021-04-11 00:39:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.989636,0.989962,0.990749,0.988968,61.0


In [13]:
import tensorflow as tf
import pandas as pd
import numpy as np
import joblib
import random
import datetime
import talib

@timeit
def generate_twitter_sentiment(hours, window):
    tweets = get_twitter_data((datetime.datetime.now() - datetime.timedelta(hours=hours)).isoformat("T") + "Z")
    sentiment = tweet_sentiment(tweets)
    return country_sentiment_df(sentiment, hours, window)

@timeit
def generate_technical_indicators(pair_df):
    pair_df["EMA_10"] = pd.DataFrame(abstract.EMA(pair_df["Close"], timeperiod=10))#960))
    pair_df["EMA_50"] = pd.DataFrame(abstract.EMA(pair_df["Close"], timeperiod=50))#4800))
    pair_df["RSI"] = pd.DataFrame(abstract.RSI(pair_df["Close"], timeperiod=14))
    pair_df["A/D Index"] = pd.DataFrame(
        abstract.AD(
            pair_df["High"], pair_df["Low"], pair_df["Close"], pair_df["Volume"]
        )
    )
    pair_df["A/D Index"] = pair_df["A/D Index"] - pair_df["A/D Index"].shift(1)
    pair_df = stationary_log_returns(pair_df)
    return pair_df

@timeit
def stationary_log_returns(pair_df):
    """
    Calculates log returns for EMA and closing price to make data stationary.

    Args:
        pair_df: Dataframe containing OHLC data, Time, and technical indicators

    Returns:
        Dataframe with EMA and closing prices substituted with log returns
    """
    pair_df = pair_df.copy()
    pair_df["Real Close"] = pair_df["Close"]
    pair_df["Close"] = np.log(pair_df["Close"] / pair_df["Close"].shift(1))
    pair_df["EMA_10"] = np.log(pair_df["EMA_10"] / pair_df["EMA_10"].shift(1))
    pair_df["EMA_50"] = np.log(pair_df["EMA_50"] / pair_df["EMA_50"].shift(1))
    return pair_df

# Used for testing
@timeit
def generate_fake_ohlc_data(data_df):
    data_df.loc[0, 'Close'] = random.random()*0.001 + 1
    data_df.loc[0, 'Open'] = random.random()*0.001 + 1
    data_df.loc[0, 'High'] = data_df.loc[0, 'Open'] + random.random()*0.001 if data_df.loc[0, 'Open'] > data_df.loc[0, 'Close'] else data_df.loc[0, 'Close'] + random.random()*0.001
    data_df.loc[0, 'Low'] = data_df.loc[0, 'Open'] - random.random()*0.001 if data_df.loc[0, 'Open'] < data_df.loc[0, 'Close'] else data_df.loc[0, 'Close'] - random.random()*0.001
    data_df.loc[0, 'Volume'] = random.randrange(10, 100, 1)
    for i in range(1, len(data_df)):
        data_df.loc[i, 'Open'] = data_df.loc[i - 1, 'Close'] + (random.random() - 0.5)*0.001
        data_df.loc[i, 'Close'] = data_df.loc[i, 'Open'] + (random.random() - 0.5)*0.001
        data_df.loc[i, 'Volume'] = random.randrange(10, 100, 1)
        if data_df.loc[i, 'Open'] > data_df.loc[i, 'Close']:
            data_df.loc[i, 'High'] = data_df.loc[i, 'Open'] + random.random()*0.001
            data_df.loc[i, 'Low'] = data_df.loc[i, 'Close'] - random.random()*0.001
        else:
            data_df.loc[i, 'High'] = data_df.loc[i, 'Close'] + random.random()*0.001
            data_df.loc[i, 'Low'] = data_df.loc[i, 'Open'] - random.random()*0.001
    return data_df

@timeit
def configure_time(minutes, dataframe, start):
    time_frame = pd.date_range(
        start=start,
        freq="{}T".format(minutes),
        end=str(datetime.datetime.now())
    )
    time_frame = pd.DataFrame(time_frame, columns=["Time"])
    time_frame["Time"] = time_frame["Time"].dt.strftime("%Y-%m-%d %H:%M:%S")
    time_frame["Time"] = pd.to_datetime(time_frame["Time"], utc=True)

    configured_df = time_frame.merge(dataframe, how="inner", on="Time")
    return configured_df

@timeit
def generate_prediction(pair, window_size, time_steps):
    fScaler = joblib.load("../scalers/{}/features.bin".format(pair))
    scaler = joblib.load("../scalers/{}/close.bin".format(pair))
    model = tf.keras.models.load_model("../models/{}".format(pair))

    buy = pair[:3]
    sell = pair[3:]

    twitter_df = generate_twitter_sentiment(48, 60)
    #ohlc_df = generate_fake_ohlc_data(twitter_df)
    #technical_analysis_df = generate_technical_indicators(ohlc_df)
    #inference_df = configure_time(15, technical_analysis_df, technical_analysis_df.loc[0, 'Time'])

    

    return twitter_df

In [14]:
print(generate_prediction("EURUSD", 96, 4))

{'public_metrics': {'retweet_count': 1, 'reply_count': 0, 'like_count': 3, 'quote_count': 0}, 'created_at': '2021-04-11T05:00:21.000Z', 'id': '1381109846464954371', 'text': 'Bitcoin Weekly Forecast: SEC commissioner cozies up to BTC ETF, on-chain metrics reset making way for volatile move https://t.co/1kI5vzvhPi'}
{'public_metrics': {'retweet_count': 1, 'reply_count': 0, 'like_count': 4, 'quote_count': 0}, 'created_at': '2021-04-10T12:00:59.000Z', 'id': '1380853312052981761', 'text': 'AUD/USD Weekly Forecast: Bulls still cautious but unwilling to give up https://t.co/cYffvEzqrT'}
{'public_metrics': {'retweet_count': 2, 'reply_count': 0, 'like_count': 4, 'quote_count': 0}, 'created_at': '2021-04-10T05:00:22.000Z', 'id': '1380747462852419585', 'text': 'USD/JPY Weekly Forecast: Fed fails to dim dollar prospects https://t.co/GYkwTbnChl'}
{'public_metrics': {'retweet_count': 1, 'reply_count': 0, 'like_count': 4, 'quote_count': 0}, 'created_at': '2021-04-09T22:00:52.000Z', 'id': '13806418894

TypeError: 'NoneType' object is not iterable