In [5]:
import pandas as pd
import numpy as np

from datetime import datetime
from functools import reduce

pd.set_option('display.max_rows', 10000)

In [10]:
def combine_indicators(currency, pair):
    
    cpi = pd.read_csv("../data/processed/cpi/{}_cpi_processed.csv".format(currency))
    gdp = pd.read_csv("../data/processed/gdp/{}_gdp_processed.csv".format(currency))
    ir = pd.read_csv("../data/processed/interest_rate/{}_ir_processed.csv".format(currency))
    ppi = pd.read_csv("../data/processed/ppi/{}_ppi_processed.csv".format(currency))
    ue = pd.read_csv("../data/processed/unemployment_rate/{}_ue_processed.csv".format(currency))
    news = pd.read_csv("../data/processed/news/news_sentiment.csv")
    news = news[{"Time", currency.upper()}]
    news = news.rename(columns={currency.upper(): "News Sentiment"})
    tweets = pd.read_csv("../data/processed/tweets/tweets_sentiment.csv")
    tweets = tweets[{"Time", currency.upper()}]
    tweets = tweets.rename(columns={currency.upper(): "Twitter Sentiment"})
    
    combined_df = merge_dataframe([cpi, gdp, ir, ppi, ue, news, tweets])
    
    if currency.upper() in pair:
        exchange_rate = pd.read_csv("../data/processed/exchange_rate/{}_exchange.csv".format(pair))
        combined_df = merge_dataframe([combined_df, exchange_rate])
            
    combined_df = combined_df[combined_df["RSI"].notnull()]
    
    return combined_df
    
def merge_dataframe(data_list):
    merged_data = reduce(lambda left, right : pd.merge(left, right, how="outer", on="Time"), data_list)     
    merged_data.sort_values(by=["Time"], inplace=True)
    merged_data = merged_data.reset_index(drop=True)
    return merged_data

def currency_pair(buy, sell):
    pair = (buy + sell).upper()
    buy_df = combine_indicators(buy, pair)
    buy = buy.upper()
    buy_df = buy_df.rename(columns={
            "CPI": buy + "_CPI", 
            "GDP": buy + "_GDP", 
            "Interest Rate": buy + " Interest Rate",
            "PPI": buy + "_PPI",
            "Unemployment Rate": buy + " Unemployment Rate",
            "News Sentiment": buy + " News Sentiment",
            "Twitter Sentiment": buy + " Twitter Sentiment",
        })
    buy_df = buy_df.reset_index(drop=True)
    sell_df = combine_indicators(sell, pair)
    sell_df = sell_df[{"Time", "CPI", "GDP", "Interest Rate", "PPI", "Unemployment Rate", "News Sentiment", "Twitter Sentiment"}]
    sell = sell.upper()
    sell_df = sell_df.rename(columns={
            "CPI": sell + "_CPI", 
            "GDP": sell + "_GDP", 
            "Interest Rate": sell + " Interest Rate",
            "PPI": sell + "_PPI",
            "Unemployment Rate": sell + " Unemployment Rate",
            "News Sentiment": sell + " News Sentiment",
            "Twitter Sentiment": sell + " Twitter Sentiment",
        })
    sell_df.reset_index(drop=True)
    pair_df = buy_df.merge(sell_df, how="inner", on=["Time"])
    pair_df = configure_time(30, pair_df)
    pair_df.to_csv("../data/processed/{}_processed.csv".format(pair), index=False)
    return pair_df

def configure_time(minutes, dataframe):
    dataframe["Volume"] = dataframe["Volume"].rolling(minutes, min_periods=1).sum()
    dataframe["High"] = dataframe["High"].rolling(minutes, min_periods=1).max()
    dataframe["Low"] = dataframe["Low"].rolling(minutes, min_periods=1).min()
    
    time_frame = pd.date_range(start="2018-01-01 22:00:00", freq="{}T".format(minutes), end="2020-12-31 21:59:00")
    time_frame = pd.DataFrame(time_frame, columns=["Time"])
    time_frame["Time"] = time_frame["Time"].dt.strftime("%Y-%m-%d %H:%M:%S")
    
    configured_df = time_frame.merge(dataframe, how="inner", on="Time")
    configured_df.at[0, "Volume"] = configured_df.at[1, "Volume"]
    configured_df["Open"] = configured_df["Open"].shift(1)
    configured_df.at[0, "Open"] = configured_df.at[1, "Open"]
    
    return configured_df

In [11]:
pair = currency_pair("eur", "usd")
pair

Unnamed: 0,Time,EUR_CPI,EUR_GDP,EUR Interest Rate,EUR_PPI,EUR Unemployment Rate,EUR News Sentiment,EUR Twitter Sentiment,RSI,EMA_10,...,Low,EMA_100,A/D Index,USD_CPI,USD Interest Rate,USD News Sentiment,USD Unemployment Rate,USD_PPI,USD_GDP,USD Twitter Sentiment
0,2018-01-01 22:00:00,1.3,0.737765,1.0339,0.005859,8.6,0.0,0.0,75.049549,1.269752e-06,...,0.000933,1.332947e-07,-25575.984028,2.070508,2.58,0.000000,4.0,0.012921,0.565669,0.000000
1,2018-01-01 22:30:00,1.3,0.737765,1.0339,0.005859,8.6,0.0,0.0,76.471968,1.318875e-06,...,-0.000750,1.360576e-07,-20339.908993,2.070508,2.58,0.000000,4.0,0.012921,0.565669,0.000000
2,2018-01-01 23:00:00,1.3,0.737765,1.0339,0.005859,8.6,0.0,0.0,74.846626,1.307114e-06,...,-0.000142,1.394085e-07,-15943.926503,2.070508,2.58,0.000000,4.0,0.012921,0.565669,0.000000
3,2018-01-01 23:30:00,1.3,0.737765,1.0339,0.005859,8.6,0.0,0.0,73.871583,1.300039e-06,...,-0.000550,1.466399e-07,-10116.935231,2.070508,2.58,0.000000,4.0,0.012921,0.565669,0.000000
4,2018-01-02 00:00:00,1.3,0.737765,1.0339,0.005859,8.6,0.0,0.0,75.436681,1.293550e-06,...,-0.000117,1.444050e-07,-4132.010774,2.070508,2.58,0.000000,4.0,0.012921,0.565669,-0.003155
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37357,2020-12-31 19:30:00,-0.3,-0.697576,-0.0913,0.001949,8.3,0.0,0.0,69.299369,7.632054e-06,...,-0.000409,1.239241e-05,64927.456120,1.362005,0.93,0.000128,6.7,0.022647,0.988378,-0.001073
37358,2020-12-31 20:00:00,-0.3,-0.697576,-0.0913,0.001949,8.3,0.0,0.0,69.088460,4.074039e-05,...,-0.000205,-2.018817e-06,61014.484780,1.362005,0.93,0.000128,6.7,0.022647,0.988378,-0.001073
37359,2020-12-31 20:30:00,-0.3,-0.697576,-0.0913,0.001949,8.3,0.0,0.0,69.094920,-1.727098e-05,...,-0.000082,7.893409e-06,55243.856680,1.362005,0.93,0.000128,6.7,0.022647,0.988378,-0.001073
37360,2020-12-31 21:00:00,-0.3,-0.697576,-0.0913,0.001949,8.3,0.0,0.0,69.151768,-1.784520e-07,...,-0.000164,-4.327320e-06,49734.711960,1.362005,0.93,0.000128,6.7,0.022647,0.988378,-0.000852
