In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import talib

In [5]:
def extract_df(path, sma_period):
    real_df = pd.read_table(path)
    df = real_df.copy()
    # remove the following columns <TICKVOL>, <VOL> and <SPREAD>
    df = df.drop(['<TICKVOL>', '<VOL>', '<SPREAD>'], axis=1)
    df = df.rename(columns={'<DATE>': 'Date', 
                                    '<TIME>': 'Time', 
                                    '<OPEN>': 'Open', 
                                    '<HIGH>': 'High', 
                                    '<LOW>': 'Low', 
                                    '<CLOSE>': 'Close'
                                    })
    # combine the date and time columns
    df['Date_Time'] = df['Date'] + ' ' + df['Time']
    df = df.drop(['Date', 'Time'], axis=1)
    df['Time'] = pd.to_datetime(df['Date_Time'])
    df = df.drop(['Time'], axis=1)
    prices = df["Close"].values
    df["SMA"] = talib.SMA(prices, timeperiod=sma_period)
    df["MACD"], df["MACD_Signal"], df["MACD_Hist"] = talib.MACD(df["Close"])
    df["MACD_Crossover"] = np.where(df["MACD"] > df["MACD_Signal"], 1, -1)
    df["MACD_Crossover_Change"] = df["MACD_Crossover"].diff()
    df = df.dropna()
    return df

eur_usd_h1_path = '/projects/genomic-ml/da2343/ml_project_2/robust_algo_trader/data/EURUSD_H1_200702210000_202304242100.tsv'

sma_list = [30, 50, 100, 200]
for sma in sma_list:
    df = extract_df(eur_usd_h1_path, sma)
    # save the df to a csv file
    df.to_csv(f'/projects/genomic-ml/da2343/ml_project_2/robust_algo_trader/data/EURUSD_H1_2007_2023_SMA_{sma}.csv', index=True)

In [6]:
only_2020_df = df[df['Date_Time'].str.contains('2020')]
only_2020_df

Unnamed: 0,Open,High,Low,Close,Date_Time,SMA,MACD,MACD_Signal,MACD_Hist,MACD_Crossover,MACD_Crossover_Change
79556,1.12132,1.12143,1.12008,1.12011,2020.01.02 06:00:00,1.113153,0.000446,0.000719,-0.000273,-1,0.0
79557,1.12011,1.12043,1.12008,1.12043,2020.01.02 07:00:00,1.113188,0.000308,0.000637,-0.000329,-1,0.0
79558,1.12043,1.12075,1.12037,1.12066,2020.01.02 08:00:00,1.113222,0.000215,0.000553,-0.000337,-1,0.0
79559,1.12074,1.12102,1.12043,1.12098,2020.01.02 09:00:00,1.113261,0.000165,0.000475,-0.000310,-1,0.0
79560,1.12098,1.12136,1.12052,1.12118,2020.01.02 10:00:00,1.113303,0.000140,0.000408,-0.000268,-1,0.0
...,...,...,...,...,...,...,...,...,...,...,...
85777,1.22385,1.22404,1.22209,1.22300,2020.12.31 18:00:00,1.222752,-0.000779,-0.000067,-0.000712,-1,0.0
85778,1.22300,1.22347,1.22218,1.22253,2020.12.31 19:00:00,1.222738,-0.001069,-0.000268,-0.000801,-1,0.0
85779,1.22255,1.22280,1.22151,1.22168,2020.12.31 20:00:00,1.222723,-0.001351,-0.000484,-0.000867,-1,0.0
85780,1.22150,1.22189,1.22127,1.22154,2020.12.31 22:00:00,1.222702,-0.001568,-0.000701,-0.000867,-1,0.0
