In [1]:
import pandas as pd
import numpy as np

from datetime import datetime
from functools import reduce

pd.set_option('display.max_rows', 10000)

In [2]:
def combine_indicators(currency):
    exchanges = ["EURUSD"]
    
    cpi = pd.read_csv("../data/processed/cpi/{}_cpi_processed.csv".format(currency))
    gdp = pd.read_csv("../data/processed/gdp/{}_gdp_processed.csv".format(currency))
    ir = pd.read_csv("../data/processed/interest_rate/{}_ir_processed.csv".format(currency))
    ppi = pd.read_csv("../data/processed/ppi/{}_ppi_processed.csv".format(currency))
    ue = pd.read_csv("../data/processed/unemployment_rate/{}_ue_processed.csv".format(currency))
    news = pd.read_csv("../data/processed/news/news_sentiment.csv")
    news = news[{"Time", currency.upper()}]
    news = news.rename(columns={currency.upper(): "News Sentiment"})
    tweets = pd.read_csv("../data/processed/tweets/tweets_sentiment.csv")
    tweets = tweets[{"Time", currency.upper()}]
    tweets = tweets.rename(columns={currency.upper(): "Twitter Sentiment"})
    
    combined_df = merge_dataframe([cpi, gdp, ir, ppi, ue, news, tweets])
    
    for pair in exchanges:
        if currency.upper() in pair:
            exchange_rate = pd.read_csv("../data/processed/exchange_rate/{}_exchange.csv".format(pair))
            combined_df = merge_dataframe([combined_df, exchange_rate])
            
    combined_df = combined_df[combined_df["RSI"].notnull()]
    
    return combined_df
    
def merge_dataframe(data_list):
    merged_data = reduce(lambda left, right : pd.merge(left, right, how="outer", on="Time"), data_list)     
    merged_data.sort_values(by=["Time"], inplace=True)
    merged_data = merged_data.reset_index(drop=True)
    return merged_data

def configure_time(minutes, dataframe):
    dataframe["Volume"] = dataframe["Volume"].rolling(minutes, min_periods=1).sum()
    dataframe["High"] = dataframe["High"].rolling(minutes, min_periods=1).max()
    dataframe["Low"] = dataframe["Low"].rolling(minutes, min_periods=1).min()
    
    time_frame = pd.date_range(start="2018-01-01 22:00:00", freq="{}T".format(minutes), end="2020-12-31 21:59:00")
    time_frame = pd.DataFrame(time_frame, columns=["Time"])
    time_frame["Time"] = time_frame["Time"].dt.strftime("%Y-%m-%d %H:%M:%S")
    
    configured_df = time_frame.merge(dataframe, how="inner", on="Time")
    configured_df.at[0, "Volume"] = configured_df.at[1, "Volume"]
    configured_df["Open"] = configured_df["Open"].shift(1)
    configured_df.at[0, "Open"] = configured_df.at[1, "Open"]
    
    return configured_df

In [5]:
usd = combine_indicators("usd")
usd

Unnamed: 0,Time,CPI,GDP,Interest Rate,PPI,Unemployment Rate,News Sentiment,Twitter Sentiment,RSI,EMA_10,Volume,Open,Close,High,Low,EMA_100,A/D Index
0,2018-01-01 22:00:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,75.049549,1.186173,13.0,1.20102,1.20100,1.20102,1.20097,1.181063,-25647.399217
1,2018-01-01 22:01:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,74.379056,1.186174,12.0,1.20099,1.20007,1.20099,1.20007,1.181063,-25487.958445
2,2018-01-01 22:02:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,74.414455,1.186176,5.0,1.20015,1.20025,1.20025,1.20015,1.181064,-25323.351338
3,2018-01-01 22:03:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,74.608222,1.186177,15.0,1.20024,1.20029,1.20050,1.20024,1.181064,-25158.488609
4,2018-01-01 22:04:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,74.677820,1.186179,8.0,1.20032,1.20032,1.20035,1.20031,1.181064,-24990.674664
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1579696,2020-12-31 21:55:00,1.362005,0.988378,0.93,103.835444,6.7,0.000128,-0.000852,69.069328,1.191297,255.0,1.22175,1.22174,1.22180,1.22170,1.210125,40804.843368
1579697,2020-12-31 21:56:00,1.362005,0.988378,0.93,103.835444,6.7,0.000128,-0.000852,69.094949,1.191272,193.0,1.22176,1.22165,1.22179,1.22160,1.210125,40661.682495
1579698,2020-12-31 21:57:00,1.362005,0.988378,0.93,103.835444,6.7,0.000128,-0.000852,69.069489,1.191222,77.0,1.22166,1.22164,1.22167,1.22153,1.210120,40483.593276
1579699,2020-12-31 21:58:00,1.362005,0.988378,0.93,103.835444,6.7,0.000128,-0.000852,69.061613,1.191204,246.0,1.22163,1.22153,1.22168,1.22143,1.210107,40344.637382


In [6]:
usd.to_csv("../data/processed/usd_processed.csv")

In [4]:
configure_time(5, cool)

Unnamed: 0,Time,CPI,GDP,Interest Rate,PPI,Unemployment Rate,News Sentiment,Twitter Sentiment,RSI,EMA_10,Volume,Open,Close,High,Low,EMA_100,A/D Index
0,2018-01-01 22:00:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,75.049549,1.186173,64.0,1.20102,1.20100,1.20102,1.20097,1.181063,-25647.399217
1,2018-01-01 22:05:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,74.824859,1.186180,64.0,1.20102,1.20052,1.20099,1.20007,1.181064,-24814.953042
2,2018-01-01 22:10:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,74.849900,1.186188,124.0,1.20031,1.20080,1.20103,1.20030,1.181065,-23924.427790
3,2018-01-01 22:15:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,74.664580,1.186196,115.0,1.20100,1.20062,1.20106,1.20062,1.181066,-23005.393338
4,2018-01-01 22:20:00,2.070508,0.565669,2.58,104.212701,4.0,0.000000,0.000000,75.594150,1.186204,196.0,1.20094,1.20057,1.20067,1.20054,1.181066,-22133.127284
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225231,2020-12-31 21:35:00,1.362005,0.988378,0.93,103.835444,6.7,0.000128,-0.000852,69.101170,1.191305,182.0,1.22198,1.22194,1.22211,1.22192,1.210176,44025.389366
225232,2020-12-31 21:40:00,1.362005,0.988378,0.93,103.835444,6.7,0.000128,-0.000852,69.058608,1.191351,209.0,1.22199,1.22194,1.22204,1.22192,1.210154,43183.786838
225233,2020-12-31 21:45:00,1.362005,0.988378,0.93,103.835444,6.7,0.000128,-0.000852,69.042428,1.191366,175.0,1.22200,1.22188,1.22199,1.22183,1.210129,42312.301078
225234,2020-12-31 21:50:00,1.362005,0.988378,0.93,103.835444,6.7,0.000128,-0.000852,69.048072,1.191240,665.0,1.22187,1.22166,1.22186,1.22161,1.210124,41522.569253
