/
4_synthetic_data.py
117 lines (99 loc) · 3.6 KB
/
4_synthetic_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import pandas as pd
import time
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
class OHLCV:
def __init__(self, timestamp, open, high, low, close, volume):
self.timestamp = timestamp
self.open = open
self.high = high
self.low = low
self.close = close
self.volume = volume
class Tick:
def __init__(self, timestamp, symbol, side, amount, price, exchange):
self.timestamp = timestamp
self.symbol = symbol
self.side = side
self.amount = amount
self.price = price
self.exchange = exchange
class DataSample:
def __init__(self):
pass
def generate(self):
#generates 1 data sample
pass
def build(self):
#builds a dataframe of data samples
pass
def plot(self):
#visualizes our dataset
pass
class DataSampleOHLCV(DataSample):
def __init__(self):
self.ts_counter = 0
self.last_price = random.randrange(19000, 21000)
def generate(self):
timestamp = datetime.now() + timedelta(minutes = self.ts_counter)
self.ts_counter += 1
open = random.randrange(self.last_price - 10, self.last_price + 10)
high = random.randrange(self.last_price, self.last_price + 10)
low = random.randrange(self.last_price - 10, self.last_price)
close = random.randrange(self.last_price - 10, self.last_price + 11)
self.last_price = close
volume = random.randrange(10, 300)
ohlcv = OHLCV(timestamp, open, high, low, close, volume)
return ohlcv
def build(self, n = 3):
list_of_ohlcvs = []
for i in range(0, n):
ohlcv = self.generate()
list_of_ohlcvs.append([ohlcv.timestamp, ohlcv.open, ohlcv.high, ohlcv.low, ohlcv.close, ohlcv.volume])
df = pd.DataFrame(list_of_ohlcvs, columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume'])
return df
def plot(self, df):
df.close.plot(figsize = (12, 8))
plt.title('OHLCV: {} samples'.format(len(df)), fontsize = 15)
plt.show()
class DataSampleTicks(DataSample):
def __init__(self):
self.last_price = random.randrange(19000, 21000)
def generate(self):
timestamp = time.time()
symbol = 'BTC/USD'
side = random.choice(['BUY', 'SELL'])
amount = random.randrange(0, 10)
price = random.randrange(self.last_price - 10, self.last_price + 11)
self.last_price = price
exchange = random.choice(['Binance', 'FTX', 'Bybit', 'dYdX'])
tick = Tick(timestamp, symbol, side, amount, price, exchange)
return tick
def build(self, n = 3):
list_of_ticks = []
for i in range(0, n):
tick = self.generate()
list_of_ticks.append([tick.timestamp, tick.symbol, tick.side, tick.amount, tick.price, tick.exchange])
df = pd.DataFrame(list_of_ticks, columns = ['timestamp', 'symbol', 'side', 'amount', 'price', 'exchange'])
return df
def plot(self, df):
df.price.plot(figsize = (12, 8))
plt.title('Ticks: {} samples'.format(len(df)), fontsize = 15)
plt.show()
class DataGenerator:
def __init__(self):
self.ticks = DataSampleTicks()
self.ohlcv = DataSampleOHLCV()
def run_ticks(self):
data = self.ticks.build(n = 10000)
self.ticks.plot(data)
return data
def run_ohlcv(self):
data = self.ohlcv.build(n = 1000)
self.ohlcv.plot(data)
return data
if __name__ == '__main__':
app = DataGenerator()
app.run_ticks()
app.run_ohlcv()