-
Notifications
You must be signed in to change notification settings - Fork 0
/
helpers.py
262 lines (236 loc) · 10.9 KB
/
helpers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
__author__ = 'DrJonoG' # Jonathon Gibbs
#
# Copyright 2016-2020 https://www.jonathongibbs.com / @DrJonoG
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the
# License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
# See the License for the specific language governing permissions and limitations under the License.
#
import os
import configparser
import numpy as np
import pandas as pd
import datetime
import time
def csvToPandas(path, asc=True, unicode=False):
if unicode:
df = pd.read_csv(path, sep=',', index_col = 0, parse_dates=["Datetime"], on_bad_lines='skip', dayfirst = True, infer_datetime_format=True, engine='c', na_filter=False, dtype='unicode')
else:
df = pd.read_csv(path, sep=',', index_col = 0, parse_dates=["Datetime"], on_bad_lines='skip', dayfirst = True, infer_datetime_format=True, engine='c', na_filter=False)
# Sort dates old to new
df = df.sort_index(ascending=asc)
return df
def PrintProgressBar (iteration, total, prefix = '', suffix = '', length = 20):
"""
Displays a progress bar
Parameters
----------
iteration : Int
The current interation
total : Int
Total number to iterate
prefix : String
The prefix to display
suffix : String
The suffix to display
length : Int
The length of the progress bar
"""
if total < 1:
total = 1.0
percent = ("{0:.1f}").format(100 * (iteration / float(total)))
filledLength = int(length * iteration // total)
bar = '█' * filledLength + '-' * (length - filledLength)
print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = "\r")
# Add new line at the end
if total == iteration:
print("\r")
def LoadIndicators(filePath='./config/indicators.ini'):
"""
Parse the configuration file for the indicators
Parameters
----------
fielPath : String
Path to the indicator configuration file
"""
parser = configparser.ConfigParser()
parser.read(filePath)
indicators = {
'expMovingAverage': np.fromstring(parser['indicators']['expMovingAverage'], dtype=int, sep=','),
'simpleMovingAverage': np.fromstring(parser['indicators']['simpleMovingAverage'], dtype=int, sep=','),
'rsiLength': parser['indicators'].getint('rsiLength'),
'bollingerPeriod': parser['indicators'].getint('bollingerPeriod'),
'bollingerStdDev': parser['indicators'].getfloat('bollingerStdDev'),
'vWAP': parser['indicators'].getboolean('vWAP'),
'precision': parser['indicators'].getint('precision')
}
return indicators
def ConfigParserBacktesting(items):
"""
Formats the configuration file into the correct types
Parameters
----------
items : List
A list of the configuration settings
"""
print(type(items))
result = []
for (key, value) in items:
typeTag = key[:2]
if typeTag == "s_":
result.append((key[2:], value))
elif typeTag == "a_":
result.append((key[2:], np.fromstring(value, dtype=float, sep=',')))
elif typeTag == "f_":
result.append((key[2:], float(value)))
elif typeTag == "i_":
result.append((key[2:], int(value)))
elif typeTag == "t_":
result.append((key[2:], pd.to_datetime(value).time()))
elif typeTag == "b_":
result.append((key[2:], bool(value)))
else:
raise ValueError('Invalid type tag "%s" found in ini file.' % typeTag)
return result
def MergeDataFolders(self, sourcePath, destinationPath):
"""
Merges files of the same time frame and symbol together.
Used when data is regularly downloaded as lookback of Yahoo is only 60 days on 5minute interval
sourcePath should contain a series of directories of different dates
Parameters
----------
sourcePath : String
A directory containing the directories of symbols to merge
destinationPath : String
The location in which to store the merged files
"""
folderList = [ f.path for f in os.scandir(sourcePath) if f.is_dir() ]
if len(folderList) > 1:
if not os.path.exists(destinationPath):
os.makedirs(destinationPath)
# Use glob module to return all csv files under root directory. Create DF from this.
fileList = pd.DataFrame([file for file in glob.glob(sourcePath + "/*/*")], columns=["fullpath"])
# Split the full path into directory and filename and join with original into one DataFrame
fileList = fileList['fullpath'].str.rsplit("\\", 1, expand=True).rename(columns={0: 'path', 1:'filename'}).join(fileList)
# List of unique files
uniqueFiles = fileList['filename'].unique()
countUnique = len(uniqueFiles)
# Iterate over unique filenames; read CSVs, concat DFs, save file
start = time.time()
for i, f in enumerate(uniqueFiles):
PrintProgressBar(i+1, countUnique, prefix = 'Merging: ' + f.ljust(10), suffix = 'Complete. Runtime: ' + str(datetime.timedelta(seconds = (time.time() - start))))
pathList = fileList[fileList['filename'] == f]['fullpath'] # Get list of fullpaths from unique filenames
dfs = [pd.read_csv(path) for path in pathList] # Get list of dataframes from CSV file paths
dfConcat = pd.concat(dfs) # Concat dataframes into one
dfConcat = dfConcat.drop_duplicates(subset=['Datetime'], keep='first').reset_index(drop=True) # Remove duplicate entries
dfConcat = dfConcat.set_index(pd.DatetimeIndex(dfConcat['Datetime'])).sort_index()
dfConcat.to_csv(destinationPath + f, index=False) # Save dataframe
def LoadTickerNames(filepath, column=0):
"""
Loads in a list of tickers from a csv file.
Parameters
----------
filepath : String
Location of the csv file containing a list of symbol names
column : Int
The column in which the symbol is located, 0 by default.
"""
# Symbol must be first column
try:
df = pd.read_csv(filepath)
return df.iloc[:, column]
except Exception as e:
print("==> Error: invalid or missing ticker file %s. Exiting application." % filepath)
exit(1)
def SymbolIterator(fileList, function, arguments, prefix='Downloading', apiCap=150, functionCalls=1):
"""
Iterate through the symbols in fileList and call the function using the arguments provided
The symbol is always the first argument passed to the function
Handles the maximum number of calls per minute and sleeps if limit has been reached
Parameters
----------
fileList : String
Location of the csv file containing a list of symbol names
function : Object
The function to be called
arguments : List
A list of arguments to be passed to the function
prefix : String
(Optional) String for the progress bar reporting
apiCap : Int
(Optional) The number of calls that can be made per minute with your Alpha API key
functionCalls : Int
(Optional) The number of api calls the function makes for each invoke
"""
start = time.time()
# Load in the ticker list
symbolList = pd.Series(dtype="float64")
# Iterate list and load csv if file list, or append string otherwise
symbolList = symbolList.append(LoadTickerNames(fileList, 0), ignore_index=True)
symbolList = symbolList.drop_duplicates()
symbolCount = len(symbolList)
# Dictionary to return data if not saving
symbolData = {}
# Iterate through symbolList and download data
print("==> Please be patient, this may take some time.")
apiCalls = 0
apiTime = time.time()
for index, symbol in symbolList.items():
# Determine seconds
duration = (time.time() - apiTime)
# Update number of API calls before it happens
apiCalls += functionCalls
# If minute cap is reached, then sleep, else continue
if duration < 60 and apiCalls > apiCap:
PrintProgressBar(index, symbolCount, prefix = '==> ' + ('API Limit Reached. Paused: ').ljust(20), suffix = 'Complete. Runtime: ' + str(datetime.timedelta(seconds = (time.time() - start))))
time.sleep((60-duration)+2)
elif duration > 60:
apiTime = time.time()
apiCalls = functionCalls
#
PrintProgressBar(index, symbolCount, prefix = '==> ' + (prefix + ': ').ljust(10) + symbol.ljust(10), suffix = 'Complete. Runtime: ' + str(datetime.timedelta(seconds = (time.time() - start))))
calledAPI = function(symbol, *arguments)
# If the api wasn't called (i.e. data already exists), deduct call count
if not calledAPI:
apiCalls -= functionCalls
# Todo: May need to add sleep function in if making too many calls per minute.
PrintProgressBar(symbolCount, symbolCount, prefix = '==> ' + (prefix + ' Complete').ljust(20), suffix = 'Complete. Total runtime: ' + str(datetime.timedelta(seconds = (time.time() - start))))
def SymbolIteratorFiles(fileList, function, arguments, prefix='Downloading'):
"""
Iterate through the symbols in fileList and call the function using the arguments provided
The symbol is always the first argument passed to the function
Parameters
----------
fileList : String
Location of the csv file containing a list of symbol names
function : Object
The function to be called
arguments : List
A list of arguments to be passed to the function
prefix : String
(Optional) String for the progress bar reporting
"""
start = time.time()
if isinstance(fileList, list):
symbolList = pd.Series(fileList)
else:
print("not a list")
# Load in the ticker list
symbolList = pd.Series(dtype="float64")
# Iterate list and load csv if file list, or append string otherwise
for f in fileList:
symbolList = symbolList.append(LoadTickerNames(f, 0), ignore_index=True)
symbolList = symbolList.drop_duplicates()
symbolCount = len(symbolList)
# Dictionary to return data if not saving
symbolData = {}
# Iterate through symbolList and download data
for index, symbol in symbolList.items():
# If minute cap is reached, then sleep, else continue
PrintProgressBar(index, symbolCount, prefix = '==> ' + (prefix + symbol).ljust(20), suffix = 'Complete. Runtime: ' + str(datetime.timedelta(seconds = (time.time() - start))))
calledAPI = function(symbol, *arguments)
PrintProgressBar(symbolCount, symbolCount, prefix = '==> ' + (prefix + ' Complete').ljust(20), suffix = 'Complete. Total runtime: ' + str(datetime.timedelta(seconds = (time.time() - start))))