In [199]:
import uuid
from datetime import datetime
from typing import Callable


class DataStoreException(Exception):
    pass


class DataStore:
    def __init__(self,
                 name=(uuid.uuid4()),
                 cold_start=True,
                 padding=True,
                 differential=300,
                 indicators=list()):
        self._name = name
        self._init_time = datetime.today().strftime('%Y-%m-%d %H:%m:%S')
        self._cold_start = cold_start
        self._indicators = indicators
        self._data = list()
        self._differential = differential
        self._counter = 0
    
    def add_datum(self,datum):
        # check correct object type
        if not isinstance(datum,dict):
            raise DataStoreException('datum must be a python dictionary type object')
            
        # check it has the correct keys
        if 'timestamp' not in datum:
            raise DataStoreException('timestamp key must be provided in datum')
            
        if len(self._data)==0:
            self._data.append(datum)
            self._data_df = pd.DataFrame(datum,index=[self._counter])
            self._counter+=1
        else:
            # check the time differential between data points
            differential = datum['timestamp'] - self._data[-1]['timestamp']
            if differential!=self._differential:
                raise DataStoreException(f'differential of {differential} did not match the stated differential.')
                
            self._data.append(datum)
            
            data_df = pd.DataFrame(datum,index=[self._counter])
            self._data_df = pd.concat([self._data_df,data_df],axis=0)
            self._counter+=1
            
            self.calc_indicators()
            
    def calc_indicators(self):
        for indicator in self._indicators:
            if isinstance(indicator, Callable):
                print(indicator(self._data_df))
            else:
                print('fail')

def mean_l4(x):
    return np.mean(x['close'][-4:])

def trend_l4(x):
    X = x['close'][-4:]
    Y = x['timestamp'][-4:]
    return np.polyfit(X,Y,1)[0]

import math

def logit(x):
    return 1/(1+math.exp(-0.001*x+0.1))

def model_prob(x):
    #print(x['close'].shape)
    return logit(x['close'][-1:])
                
a = DataStore(cold_start='hello',indicators=[mean_l4,trend_l4])


if __name__=='__main__':
    
    def mean_l4(x):
        return np.mean(x['close'][-4:])

    def trend_l4(x):
        X = x['close'][-4:]
        Y = x['timestamp'][-4:]
        return np.polyfit(X,Y,1)[0]

    a = DataStore(cold_start='hello',indicators=[mean_l4,trend_l4,model_prob])

    a.add_datum({'timestamp':300,'close':400})
    
    a.add_datum({'timestamp':600,'close':420})
    
    a.add_datum({'timestamp':900,'close':410})
    
    a.add_datum({'timestamp':1200,'close':415})
    
    a.add_datum({'timestamp':1500,'close':410})
    
    a.add_datum({'timestamp':1800,'close':2410})

410.0
15.000000000000163
0.5793242521487495
410.0
14.999999999999925
0.5768852611320463
411.25
23.99999999999987
0.5781052328843092
413.75
-54.54545454545413
0.5768852611320463
911.25
0.3002485376760993
0.9097018552970803


In [184]:
a._data_df['close'][-1:][0]

400

In [185]:
np.array([1.2345])[-1]

1.2345

In [162]:
np.array([])[-1]

IndexError: index -1 is out of bounds for axis 0 with size 0

In [147]:
a.add_datum({'timestamp':1200,'close':420})

412.5
27.27272727272728


In [148]:
 a.add_datum({'timestamp':1500,'close':410})

415.0
-30.000000000000192


In [149]:
a.add_datum({'timestamp':1800,'close':429})

417.25
28.115653040877365


In [150]:
a.add_datum({'timestamp':2100,'close':439})

424.5
24.728850325379558


In [151]:
a._data

[{'timestamp': 300, 'close': 400},
 {'timestamp': 600, 'close': 420},
 {'timestamp': 900, 'close': 410},
 {'timestamp': 1200, 'close': 420},
 {'timestamp': 1500, 'close': 410},
 {'timestamp': 1800, 'close': 429},
 {'timestamp': 2100, 'close': 439}]

In [1]:
import pandas as pd
import numpy as np
import pickle
import time
import math
from matplotlib import pyplot as plt
import sys

import warnings
warnings.simplefilter('ignore', np.RankWarning)

'''
dataset which accepts data points through time. Has two modes:
- irregular:
    - stores random datapoints through time.
    - doesn't perform any checks, just stores the data in it's raw form
- regular:
    - stores the data through time
    - checks its regular, checks for any missing datapoints
    - interpolates missing data points, if missing
    - calculates indicators like RSI, gradients of varying order, etc....
    - can splice in an irregular dataset into a regular one?
'''

class DataStoreException(Exception):
    pass


class DataStore:
    ''' Class which iteratively stores data points, calculating statistical indicators as it goes along.'''

    def __init__(self,
                name:str,
                cold_start: bool=True,
                is_regular: bool=True, 
                indicators: list=['rolling_mean_30','rolling_median_30','calc_trend_30','calc_acceleration_30']):

        self._name = name
        self.init_time = time.time()
        self._unique_id = f'{name}_{self.init_time}'

        self._data = pd.DataFrame()
        self.is_regular = is_regular

        # Indicators is a list contain all the indicators which will be calculated on the go as each data point is added
        self._indicators = indicators

        #self.mlp = joblib.load('/home/cemlyn/Documents/BinanceBot/app/data/models/mlp_model.bz2')
        self.mlp_covars = list()
    
    def add_datum(self,datum:pd.DataFrame):
        ''' Add new data from the websocket feed to here - Note that data points are only added when the kline is closed. '''
        # Append new data points
        self._data = pd.concat([self._data,datum],axis=0)

        if not self.check_is_regular():
            pass
        
        self.calc_indicators(*self._indicators)


    def insert_datum(self,datum:pd.DataFrame):
        self._data = self._data.combine_first(datum)


    def check_is_regular(self):
        ''' Checks the close times series to see if time intervals are regular. '''
        close_times = self._data.reset_index()[['close-time']].copy()

        if len(close_times)<2:
            return True

        close_times.loc[:,'close-time-diff'] = close_times['close-time'].diff()

        if (close_times['close-time-diff'].dropna().nunique())>1:
            return False
        else:
            return True
    

    def calc_indicators(self,*args):
        for arg in args:
            func = getattr(DataStore,arg)
            self.calc_indicator(func)


    def calc_indicator(self,func):
        ''' Calculates values for the indicator. '''
        indicator_name = func.__name__
        indicator_value = func(self._data['close-price'])

        if indicator_name not in self._data:
            self._data.loc[:,indicator_name] = indicator_value
        else:
            indicator_index_value = self._data.columns.tolist().index(indicator_name)
            self._data.iloc[-1,indicator_index_value] = indicator_value



    @staticmethod
    def rolling_mean_30(series):
        return np.mean(series.iloc[-30:])

    @staticmethod
    def rolling_median_30(series):
        return np.median(series.iloc[-30:])

    @staticmethod
    def calc_trend_30(series):
        x = series.iloc[-30:].index.astype(float).values
        y = series.iloc[-30:].astype(float).values
        
        if len(series)>3:
            grad = np.polyfit(x,y,1,full=False)[0]
        else:
            grad = np.nan
        return grad

    @staticmethod
    def calc_trend(series):
        x = series.dropna().index.astype(float).values
        y = series.dropna().astype(float).values
        if len(y)>3:
            grad = np.polyfit(x,y,1,full=False)[0]
        else:
            grad = np.nan
        return grad

    @staticmethod
    def calc_acceleration_30(series):
        first_order = series.iloc[-60:].rolling(30).apply(lambda x: DataStore.calc_trend(x))
        first_order = first_order.iloc[-30:]
        return DataStore.calc_trend(first_order)

# if __name__ == "__main__":
    
#     store = DataStore(name="Test")

#     df = pd.read_csv('bot.csv',index_col=0)

#     for n,row in enumerate(df.iterrows()):
#         store.add_datum(row[1].to_frame().T)

#     store._data.to_csv('cvb.csv')