In [6]:
import numpy as np

In [18]:
# ts, x, y
arr = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

arr[0]
arr[:,1]

brr = np.array(arr[arr[:,0] > 3][:,1:].tolist())
arr
brr[:,1]

array([6, 9])

In [29]:
class OSIEstimator():
    def __init__(
        self,
        lookback=1000 * 60 * 60 * 24,
        update_interval=1000 * 60 * 60,
    ):
        self.lookback = lookback
        self.update_interval = update_interval
        self.buys = []
        self.sells = []
        self.previous_update = 0

    def update(self, trades):
        # trade is list of lists [[timestamp, price, amount, mid_price]]
        for trade in trades:
            ts, price, amount, mid_price = trade

            # sides are determined by price relative to mid_price, based on what side take is on
            if price < mid_price:
                self.sells.append(np.array([ts, amount]))
            elif price > mid_price:
                self.buys.append(np.array([ts, amount]))

    def calculate_values(self):
        """
        Function for calculating OSI. Currently updates OSI every hour.
        self.trade_bids/asks are two dimensional arrays formed like this [[timestamp, size]]
        This function sorts the trades by size and takes the 90% quantile sized trades for OSI calculation.
        """

        buy, sell = np.array(self.buys), np.array(self.sells)
        buy_qty, sell_qty = buy[:, 1], sell[:, 1]
        decile_buys = buy_qty[: (buy_qty < np.percentile(buy_qty, 90)).argmin()].sum()
        decile_sells = sell_qty[
            : (sell_qty < np.percentile(sell_qty, 90)).argmin()
        ].sum()

        osi = 100 * ((decile_buys - decile_sells) / (decile_buys + decile_sells))
        self.osi = osi

    def get_value(self, ts):
        if ts >= self.previous_update + self.update_interval:
            # prune trades that are too old
            buy_arr = np.array(self.buys)
            self.buys = buy_arr[buy_arr[:, 0] < (ts - self.lookback)].tolist()

            sell_arr = np.array(self.sells)
            self.sells = sell_arr[sell_arr[:, 0] < (ts - self.lookback)].tolist()

            self.calculate_values()
            self.previous_update = ts
        return self.osi
    
osi = OSIEstimator()
osi.update([[10, 10, 10, 0]])
osi.update([[20, 20, 20, 0]])

osi.update([[10, 10, 10, 40]])
osi.update([[20, 20, 20, 40]])
buy, sell = np.array(osi.buys), np.array(osi.sells)
buy_qty, sell_qty = buy[:, 1], sell[:, 1]


decile_buys = buy_qty[: (buy_qty < np.percentile(buy_qty, 90)).argmin()].sum()
decile_sells = sell_qty[
    : (sell_qty < np.percentile(sell_qty, 90)).argmin()
].sum()



0

In [31]:
import pandas as pd
import logging
import numpy as np
from scipy.optimize import curve_fit
import queue
import pandas as pd
import math


In [7]:
class VolatilityEstimator():
    """
    lookback: how far back (in milliseconds) to look at trades, default: 1 day
    return_aggregation: aggregation period for price returns (what is the time period between two price observations), default: 10 minutes
    update_interval: how often to update the estimator (in milliseconds), default: 1 minute
    """

    def __init__(
        self,
        lookback=(1000 * 60 * 60 * 24),
        return_aggregation=(1000 * 60 * 10),
        update_interval=(1000 * 60),
    ):
        self.lookback = lookback
        self.update_interval = update_interval
        self.return_aggregation = return_aggregation

        self.previous_ts = 0

        # self.prices = []
        self.prices = []

        self.volatility = 0
        self.previous_update = 0

    def update(self, new_price, ts):
        if ts >= self.previous_ts + self.return_aggregation:
            self.prices.append([ts, new_price])
            self.previous_ts = ts

    def calculate_volatility_real(self):
        """
        Vol = std(returns)
        Annualized vol = vol * sqrt(aggregation periods per year)
            - aggregation period is in milliseconds
        """
        a = np.array(self.price_queue.queue)
        return_arr = np.diff(a) / a[:, 1:]
        vol = np.std(return_arr)
        self.volatility = vol * np.sqrt(
            (1000 * 60 * 60 * 24 * 365) / self.return_aggregation
        )

    def calculate_values(self):
        """
        Calculates price volatility instead of percentage volatility
        """
        arr = np.array(self.prices)
        self.volatility = np.sqrt(np.sum(np.square(np.diff(arr))) / arr.size)

    def get_value(self, ts):
        if ts >= self.previous_update + self.update_interval:
            # prune old prices
            prices = np.array(self.prices)
            self.prices = prices[prices[:, 0] >= ts - self.lookback].tolist()
            self.calculate_values()
            self.previous_update = ts
        return self.volatility
    
vol = VolatilityEstimator(update_interval=0, return_aggregation=0)
vol.update(10, 100)
vol.update(11, 200)
np.array(vol.prices)[:,1]



array([10, 11])

In [45]:
base = pd.read_csv("/home/juuso/Documents/gradu/parsed_data/aggregated/base_data.csv")
indicators = pd.read_csv("/home/juuso/Documents/gradu/parsed_data/aggregated/indicator_data.csv")
data = pd.merge(base, indicators, on="timestamp", how='left').ffill().dropna()

In [79]:
len(data.to_numpy())

161113

In [93]:
envs = 3

current_step = np.array(range(3))
second_step = np.array([1])



current_step += 1
current_step

values = np.array(range(10)) * 10
print(values.shape, current_step.shape)
print(values[current_step])
print(values[second_step])

(10,) (3,)
[10 20 30]
[10]


In [108]:
a = np.array([1]).reshape(-1,1).T
b = np.array([2]).reshape(-1,1).T
c = np.array([3]).reshape(-1,1).T

print(np.concatenate([a,b,c]).T.shape)

a = np.array([1,2,3]).reshape(-1,1).T
b = np.array([4,5,6]).reshape(-1,1).T
c = np.array([7,8,9]).reshape(-1,1).T

np.concatenate([a,b,c]).T.shape

(1, 3)


(3, 3)

In [130]:
a = np.array([[1, 2], [3, 4]]).reshape(-1,1).T
a.reshape(4,)

array([1, 2, 3, 4])

In [161]:
data = 100
envs = 5
data_portion=0.3

if data_portion * envs < 1:
    start = np.array(range(envs)) * data_portion
    end = start + data_portion
    start_val = start * data
    end_val = end * data
    
else:
    offset = (1-data_portion) / (envs-1)
    start = np.array(range(envs)) * offset
    end = start + data_portion
    start_val = start * data
    end_val = end * data
    
    
start_val, end_val


(array([ 0. , 17.5, 35. , 52.5, 70. ]),
 array([ 30. ,  47.5,  65. ,  82.5, 100. ]))

In [5]:
# check if array has only one true value
a = np.array([True, False, False])
np

array([4, 5, 6])