In [1]:
!pip install pandas_datareader



Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
You should consider upgrading via the 'C:\ProgramData\Anaconda3\python.exe -m pip install --upgrade pip' command.


In [2]:
import datetime as dt
import numpy as np
import pandas as pd


from numpy import log as ln
pd.core.common.is_list_like = pd.api.types.is_list_like
import pandas_datareader as web
import chart_studio.plotly as py
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit

#import tensorflow as tf
from statsmodels.tools.eval_measures import rmse
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, LeakyReLU
from sklearn.metrics import r2_score, mean_squared_error

from keras.preprocessing.sequence import TimeseriesGenerator

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


In [3]:
def reshape_x(dataframe, time_string):
    """Converts a timeseries dataframe into the given time_string format"""
    df = dataframe.resample(time_string, on="DateTime").agg({
        "Open": "first",
        "High": "max",
        "Low": "min",
        "Close": "last",
        "Volume": "sum"
    })
    return df.dropna()

In [4]:
def calculate_features(dataframe, frequency):
    """This function adds features to a dataframe.
    
    This function adds features to a dataframe. It will 
    Ln: Lowest price in the last n days
    Hn: Highest price in the last n days
    RoC: % Change from the current t value, compared to t - n 
    S%D: Stochastic oscillator %D
    S%K: Stochastic oscillat %K
    SYt: Return of the index at time t
    ASY5: Average return in the last n days
    """
    dataframe["RoC"] = ((dataframe["Close"] / dataframe["Close"].shift(frequency)) - 1) * 100
    dataframe["Ln"] = dataframe["Low"].rolling(window=frequency).min()
    dataframe["Hn"] = dataframe["High"].rolling(window=frequency).max()
    dataframe["S%K"] = (dataframe["Close"] - dataframe["Ln"]) / (dataframe["Hn"] - dataframe["Ln"]) * 100
    dataframe["S%D"] = dataframe["S%K"].rolling(window=frequency).mean()
    dataframe["SYt"] = (ln(dataframe["Close"]) - ln(dataframe["Close"].shift(1))) * 100
    dataframe["ASY5"] = dataframe["SYt"].rolling(5).mean()
    dataframe["MA5"] = dataframe["Close"].rolling(5).mean()
    dataframe["A"] = np.where(dataframe["Close"] > dataframe["Close"].shift(1), 1, 0)
    dataframe["PSY12"] = dataframe["A"].rolling(frequency).sum() * 2
    return dataframe.dropna()

In [5]:
df_orig = pd.read_csv("EURUSD1m.csv", parse_dates={"DateTime": ["Date","Timestamp"]})
df = df_orig

In [6]:
df_daily = reshape_x(df, "d")
n = 14 - 2 # 2 weeks - missing saturdays
df_daily = calculate_features(df_daily, n)

scaler = MinMaxScaler()
s_cols = ["Close", "RoC", "S%D", "PSY12", "ASY5"]
scaled = scaler.fit_transform(df_daily[s_cols])
scaled_daily_df = pd.DataFrame(data=scaled, index=df_daily.index, columns=s_cols)

col_t1 = ["Close", "RoC", "S%D"]
col_t2 = ["Close", "PSY12", "ASY5"]
col_comb = ["Close", "RoC", "S%D", "PSY12", "ASY5"]
df_t1 = scaled_daily_df[col_t1] # Tier 1 features dataframe
df_t2 = scaled_daily_df[col_t2] # Tier 2 features dataframe
df_comb = scaled_daily_df[col_comb] # Tier 1 & 2 features dataframe
#df_t1 = df_daily[col_t1] # Tier 1 features dataframe
#df_t2 = df_daily[col_t2] # Tier 2 features dataframe
#df_comb = df_daily[col_comb] # Tier 1 & 2 features dataframe

In [7]:
df_comb

Unnamed: 0_level_0,Close,RoC,S%D,PSY12,ASY5
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-01-27,0.817467,0.279074,0.239967,0.272727,0.452398
2010-01-28,0.802634,0.256945,0.173515,0.272727,0.376690
2010-01-29,0.779823,0.260848,0.136936,0.272727,0.291014
2010-01-31,0.782677,0.276472,0.108876,0.363636,0.304610
2010-02-01,0.793195,0.281944,0.078999,0.363636,0.385942
2010-02-02,0.805263,0.372145,0.096992,0.454545,0.463539
2010-02-03,0.788003,0.417379,0.103333,0.454545,0.455427
2010-02-04,0.752629,0.342233,0.096394,0.454545,0.413660
2010-02-05,0.738426,0.289825,0.092134,0.363636,0.357288
2010-02-07,0.730470,0.263506,0.079928,0.272727,0.296498


In [12]:
df_comb.to_csv(r'MockData.csv')