<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#JupyterNB-Preparation" data-toc-modified-id="JupyterNB-Preparation-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>JupyterNB Preparation</a></span></li><li><span><a href="#Oanda-API-setup" data-toc-modified-id="Oanda-API-setup-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Oanda API setup</a></span></li><li><span><a href="#Input-for-download-data-from-Oanda-API" data-toc-modified-id="Input-for-download-data-from-Oanda-API-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Input for download data from Oanda API</a></span></li><li><span><a href="#Download-data-from-Oanda" data-toc-modified-id="Download-data-from-Oanda-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Download data from Oanda</a></span></li><li><span><a href="#Write-data-to-disk-in-HDF5" data-toc-modified-id="Write-data-to-disk-in-HDF5-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Write data to disk in HDF5</a></span></li><li><span><a href="#Create-a-middle-for-each-candle" data-toc-modified-id="Create-a-middle-for-each-candle-6"><span class="toc-item-num">6&nbsp;&nbsp;</span>Create a middle for each candle</a></span></li><li><span><a href="#Import-data-in-DataFrame-and-drop-irrelevant-columns" data-toc-modified-id="Import-data-in-DataFrame-and-drop-irrelevant-columns-7"><span class="toc-item-num">7&nbsp;&nbsp;</span>Import data in DataFrame and drop irrelevant columns</a></span></li><li><span><a href="#Extract-the-date_time-features-in-separate-columns" data-toc-modified-id="Extract-the-date_time-features-in-separate-columns-8"><span class="toc-item-num">8&nbsp;&nbsp;</span>Extract the date_time features in separate columns</a></span></li><li><span><a href="#Remove-columns-we-don't-need" data-toc-modified-id="Remove-columns-we-don't-need-9"><span class="toc-item-num">9&nbsp;&nbsp;</span>Remove columns we don't need</a></span></li><li><span><a href="#Add-indicators-columns" data-toc-modified-id="Add-indicators-columns-10"><span class="toc-item-num">10&nbsp;&nbsp;</span>Add indicators columns</a></span></li><li><span><a href="#Remove-the-first-records-for-which-we-don't-have-indicators" data-toc-modified-id="Remove-the-first-records-for-which-we-don't-have-indicators-11"><span class="toc-item-num">11&nbsp;&nbsp;</span>Remove the first records for which we don't have indicators</a></span></li><li><span><a href="#Plot-data-at-set-interval-and-export-as-.png-in-the-relevant-folders" data-toc-modified-id="Plot-data-at-set-interval-and-export-as-.png-in-the-relevant-folders-12"><span class="toc-item-num">12&nbsp;&nbsp;</span>Plot data at set interval and export as .png in the relevant folders</a></span></li><li><span><a href="#OPTIONAL---Move/copy-files-from-train-folder-to-valid-and-sample-folders" data-toc-modified-id="OPTIONAL---Move/copy-files-from-train-folder-to-valid-and-sample-folders-13"><span class="toc-item-num">13&nbsp;&nbsp;</span>OPTIONAL - Move/copy files from train folder to valid and sample folders</a></span></li><li><span><a href="#Testing-cells" data-toc-modified-id="Testing-cells-14"><span class="toc-item-num">14&nbsp;&nbsp;</span>Testing cells</a></span></li></ul></div>

## JupyterNB Preparation

In [18]:
import numpy as np
import pandas as pd
import oandapy as opy
import csv
import matplotlib.pyplot as plt

from fastai.imports import *
from fastai.transforms import *
from fastai.conv_learner import *
from fastai.model import *
from fastai.dataset import *
from fastai.sgdr import *
from fastai.plots import *

# JupyterNB dark theme plotting made visible
plt.style.use(['dark_background'])

%reload_ext autoreload
%autoreload 2
%matplotlib inline

## Oanda API setup

In [19]:
accountid="ARagalie"
token="96caa09d5c9c1ea2ffd8e6e2dec9e443-363dcb9c2dc3f1c99bf632ed79745003"
env="practice"
oanda = opy.API(environment=env, access_token=token)

## Input for download data from Oanda API

In [20]:
d1 = '2015-01-09'
d2 = '2018-02-28'
gr = 'M1'
instrument = 'GBP_USD'

## Download data from Oanda

In [58]:
# Download data in chunks
dates = pd.date_range(start=d1, end=d2, freq='B')
df = pd.DataFrame()
 
for i in range(0, len(dates) -1):
    d1 = str(dates[i]).replace(' ', 'T')
    d2 = str(dates[i+1]).replace(' ', 'T')
    try:
        data = oanda.get_history(instrument=instrument, start=d1, end=d2, granularity=gr)
        df = df.append(pd.DataFrame(data['candles']))
    except:
        pass
 
index = pd.DatetimeIndex(df['time'], tz='UTC')
df.index = index
df = df.drop(['time', 'complete'], axis=1)

In [62]:
d1 = '2015-01-09'
d2 = '2018-02-28'

## Write data to disk in HDF5

In [60]:
PATH = "/home/alex/DL/AlgoTrading/"
os.chdir(PATH)
h5s = pd.HDFStore(instrument + '_' + gr + '_' + d1[:10] + '_' + d2[:10] + '.h5s', 'w', format='table')
h5s['data'] = df
h5s.close()

## Create a middle for each candle

In [61]:
df['openMid'] = (df['openAsk'] + df['openBid'])/2
df['closeMid'] = (df['closeAsk'] + df['closeBid'])/2

## Import data in DataFrame and drop irrelevant columns

In [21]:
from copy import deepcopy

PATH = "/home/alex/DL/AlgoTrading/"
os.chdir(PATH)

h5s = pd.HDFStore(instrument + '_' + gr + '_' + d1[:10] + '_' + d2[:10] + '.h5s', 'r')
df = h5s['data']
h5s.close()
df['closeMid'] = (df['closeBid']+df['closeAsk'])/2
data = df[['closeBid', 'closeAsk', 'closeMid']]


In [22]:
# Size of DataFrame
data.shape

(2852360, 3)

In [23]:
# Display all the data
def display_all(df):
    with pd.option_context("display.max_rows", 1000, "display.max_columns", 1000): 
        display(df)
display_all(data)

Unnamed: 0_level_0,closeBid,closeAsk,closeMid
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2010-01-11 00:00:00+00:00,1.60800,1.60840,1.608200
2010-01-11 00:01:00+00:00,1.60789,1.60829,1.608090
2010-01-11 00:02:00+00:00,1.60798,1.60838,1.608180
2010-01-11 00:03:00+00:00,1.60810,1.60850,1.608300
2010-01-11 00:04:00+00:00,1.60821,1.60861,1.608410
2010-01-11 00:05:00+00:00,1.60849,1.60889,1.608690
2010-01-11 00:06:00+00:00,1.60936,1.60976,1.609560
2010-01-11 00:07:00+00:00,1.60955,1.60995,1.609750
2010-01-11 00:08:00+00:00,1.60995,1.61035,1.610150
2010-01-11 00:10:00+00:00,1.61007,1.61047,1.610270


## Extract the date_time features in separate columns

In [24]:
data['date'] = data.index.astype(str).str[:10]
data['hour'] = data.index.astype(str).str[-14:]
data['hour'] = data['hour'].astype(str).str[:8]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


## Remove columns we don't need

In [25]:
data.drop(['closeBid','closeAsk','date'], axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


## Add indicators columns

In [26]:
# MA50
ma = 50
data = data.reindex(columns = np.append(data.columns, ['MA50']))
data['MA50'] = data[['closeMid']].rolling(ma).mean()

In [27]:
# Bollinger Bands
maB = 20
data['BB_20 ma'] = data[['closeMid']].rolling(maB).mean()
data['BB_20 sd'] = data[['closeMid']].rolling(maB).std()
data['BB_Upper Band'] = data['BB_20 ma'] + (data['BB_20 sd']*2)
data['BB_Lower Band'] = data['BB_20 ma'] - (data['BB_20 sd']*2)
data.drop(['BB_20 sd'], axis=1, inplace=True)

## Remove the first records for which we don't have indicators

In [28]:
# Remove the first records which don't have MA calculation
data.drop(data.index[:ma], inplace=True)

In [29]:
len(data.index)

2852310

## Plot data at set interval and export as .png in the relevant folders

In [30]:
from ipywidgets import FloatProgress
from IPython.display import display

# Initialize the size of the ViewPort
size_ViewPort = 60
start_ViewPort = 0
end_ViewPort = size_ViewPort

# Progress bar
f = FloatProgress(min=0, max=(len(data.index)/end_ViewPort))
f.description = 'Progress'
display(f)
# go through the DataFrame and create ViewPorts of 30 records
pic_counter = 0
while end_ViewPort <= len(data.index):
    if (round(data.iloc[end_ViewPort]['closeMid'], 4) - round(data.iloc[end_ViewPort+1]['closeMid'], 4)) > 0:
        data.iloc[start_ViewPort:end_ViewPort].plot(
            kind='line', grid=False, legend=False, figsize=(3, 3)).axis('off')
        PATH = "/home/alex/DL/AlgoTrading/data/ViewPorts/train/buy"
        os.chdir(PATH)
        plt.savefig('buy.' + str(pic_counter) + '.png', dpi=100)
        plt.close()
        pic_counter += 1
        start_ViewPort = end_ViewPort
        end_ViewPort += size_ViewPort
    elif (round(data.iloc[end_ViewPort]['closeMid'], 4) - round(data.iloc[end_ViewPort+1]['closeMid'], 4)) == 0:
        data.iloc[start_ViewPort:end_ViewPort].plot(
            kind='line', grid=False, legend=False, figsize=(3, 3)).axis('off')
        PATH = "/home/alex/DL/AlgoTrading/data/ViewPorts/train/hold"
        os.chdir(PATH)
        plt.savefig('hold.' + str(pic_counter) + '.png', dpi=100)
        plt.close()
        pic_counter += 1
        start_ViewPort = end_ViewPort
        end_ViewPort += size_ViewPort
    else:
        data.iloc[start_ViewPort:end_ViewPort].plot(
            kind='line', grid=False, legend=False, figsize=(3, 3)).axis('off')
        PATH = "/home/alex/DL/AlgoTrading/data/ViewPorts/train/sell"
        os.chdir(PATH)
        plt.savefig('sell.' + str(pic_counter) + '.png', dpi=100)
        plt.close()
        pic_counter += 1
        start_ViewPort = end_ViewPort
        end_ViewPort += size_ViewPort
    f.value += 1
f.bar_style = 'success'

  delta = (x1t - x0t) * margin


In [31]:
# Clean up the folder of .ipynb_checkpoints to be able to run the model

PATH = "/home/alex/DL/AlgoTrading/data/ViewPorts/train/buy"
os.chdir(PATH)
!rm -rf .ipynb_checkpoints
PATH = "/home/alex/DL/AlgoTrading/data/ViewPorts/train/sell"
os.chdir(PATH)
!rm -rf .ipynb_checkpoints
PATH = "/home/alex/DL/AlgoTrading/data/ViewPorts/train/hold"
os.chdir(PATH)
!rm -rf .ipynb_checkpoints

OSError: [Errno 12] Cannot allocate memory

## OPTIONAL - Move/copy files from train folder to valid and sample folders

In [None]:
# Move "win" files from train > valid
import os
import shutil
import random
import os.path

src_dir  = '/home/alex/DL/AlgoTrading/data/ViewPorts/train/win'
target_dir = '/home/alex/DL/AlgoTrading/data/ViewPorts/valid/win'
src_files = (os.listdir(src_dir))
pct_split_to_valid = 10
nr_files_to_move = len(os.listdir(src_dir))*pct_split_to_valid/100

def valid_path(dir_path, filename):
    full_path = os.path.join(dir_path, filename)
    return os.path.isfile(full_path)  
files = [os.path.join(src_dir, f) for f in src_files if valid_path(src_dir, f)]
choices = random.sample(files, int(no_files_to_move))
for files in choices:
    shutil.move(files, target_dir)
print ('Finished!')

In [None]:
# Move "lose" files from train > valid
src_dir  = '/home/alex/DL/AlgoTrading/data/ViewPorts/train/lose'
target_dir = '/home/alex/DL/AlgoTrading/data/ViewPorts/valid/lose'
src_files = (os.listdir(src_dir))
pct_split_to_valid = 10
nr_files_to_move = len(os.listdir(src_dir))*pct_split_to_valid/100

def valid_path(dir_path, filename):
    full_path = os.path.join(dir_path, filename)
    return os.path.isfile(full_path)  
files = [os.path.join(src_dir, f) for f in src_files if valid_path(src_dir, f)]
choices = random.sample(files, int(no_files_to_move))
for files in choices:
    shutil.move(files, target_dir)
print ('Finished!')

In [None]:
# Copy "win" files from train > sample

src_dir  = '/home/alex/DL/AlgoTrading/data/ViewPorts/train/win'
target_dir = '/home/alex/DL/AlgoTrading/data/ViewPorts/sample/win'
src_files = (os.listdir(src_dir))
nr_files_to_move = 10

def valid_path(dir_path, filename):
    full_path = os.path.join(dir_path, filename)
    return os.path.isfile(full_path)  
files = [os.path.join(src_dir, f) for f in src_files if valid_path(src_dir, f)]
choices = random.sample(files, int(no_files_to_move))
for files in choices:
    shutil.copy(files, target_dir)
print ('Finished!')

In [None]:
# Copy "lose" files from train > sample

src_dir  = '/home/alex/DL/AlgoTrading/data/ViewPorts/train/lose'
target_dir = '/home/alex/DL/AlgoTrading/data/ViewPorts/sample/lose'
src_files = (os.listdir(src_dir))
nr_files_to_move = 10

def valid_path(dir_path, filename):
    full_path = os.path.join(dir_path, filename)
    return os.path.isfile(full_path)  
files = [os.path.join(src_dir, f) for f in src_files if valid_path(src_dir, f)]
choices = random.sample(files, int(no_files_to_move))
for files in choices:
    shutil.copy(files, target_dir)
print ('Finished!')

## Testing cells

In [None]:
data.head()

In [15]:
data.iloc[start_ViewPort:end_ViewPort].plot(kind='line'
                                            , grid=False, legend=False
                                            , figsize=(2.5,2.5), bbox_inches='tight').axis('off')

AttributeError: Unknown property bbox_inches

Error in callback <function install_repl_displayhook.<locals>.post_execute at 0x7efc0bcaec80> (for post_execute):


ValueError: year 43199 is out of range

ValueError: year 43199 is out of range

<Figure size 180x180 with 1 Axes>

In [17]:
plt.savefig( 'sell.png', bbox_inches='tight')

<Figure size 432x288 with 0 Axes>