In [1]:
from binance.client import Client
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import datetime
import talib
import scipy
from pykalman import KalmanFilter

ImportError: numpy.core.multiarray failed to import

## The IDEA
### Step 1. Get data
### Step 2. Add indicators
### Step 3. Feature engineer the indicators to make it usable in any market
### Step 4. Test? 

## Below are functions we are going to use
1. Get Binance Data to get Historical Prices
2. Apply Kalman will allow us to clean the noisey af price data

In [3]:
# getting data from 300 days of history
def get_binance_data(ticker, timeframe, period):
    yes = Client()
    day = datetime.datetime.timestamp(datetime.datetime.now() - datetime.timedelta(days=period))
    res = yes.get_historical_klines(symbol = ticker, interval = timeframe, start_str=str(day))
    df = pd.DataFrame(data=res)
    df = df.drop([6,7,8,9,10,11],axis = 1)
    df.columns = ['time', 'open', 'high', 'low', 'close', 'volume']
    z = ['open', 'high', 'low', 'close', 'volume']
    for i in z:
        df[i] = pd.to_numeric(df[i], downcast="float")
    df['time']=(pd.to_datetime(df['time'],unit='ms'))
    df = df.set_index(df['time'])
    return df

def apply_kalman(close):
    kf = KalmanFilter(transition_matrices = [1],
                    observation_matrices = [1],
                    initial_state_mean = 0,
                    initial_state_covariance = 1,
                    observation_covariance=1,
                    transition_covariance=.1)
    state_means, _ = kf.filter(close)
    return state_means.flatten()
    

## This is where most of the MEAT is done. We need to Feature Engineer all our indicators in out add_signals function. In this case, I will be applying:
#### 1. Kalman Filter GRADIENT to be the output of the model (in Essence, Binary Classification to detect UP (1) or DOWN (0))
#### 2. Normalized volume by applying an exponential moving average that is normalised in this fashion ( (volume.ewm(4) - volume.ewm(5) )/ volume.ewm(4))
#### 3. Normalized moving averages by applying a similar normalisation calculation with 5MA and 20MA
#### 4. RSI and ROCP are already normalised :D
#### 5. We would have to do inspection to see weather the feature is normally distributed (To improve ML performance for Tree Based Models and Neural Nets) This can be done with the jarque_bera test using the scipy package (scipy.stats.jarque_bera(df['volFeature'])) and having the p value to be close to 0 and a histogram
#### 6. Remove outliers that are outside of the 3 standard deviation barrier (99.8%)
#### P.S For more inspiration, Please visit investopedia, do more research on difference indicators and what they mean :D

In [35]:
# generate signals
def add_signals(df : pd.DataFrame) -> pd.DataFrame:
    #defining targets
    df['kalman'] = apply_kalman(df['close'])
    df = df[100:]
    df['gradient'] = np.gradient(df['kalman'])
    df['target'] = df['gradient'] > 0

    # volume
    df['volMa2'] = df['volume'].ewm(4).mean()
    df['volMa3'] = df['volume'].ewm(5).mean()
    df = df.dropna()
    df['volFeature'] = (df['volMa2'] - df['volMa3'])/df['volMa2']

    # defining MA
    df['ewm3'] = df['close'].ewm(3).mean()
    df['5ma'] = df['ewm3'].rolling(5).mean()
    df['20ma'] = df['ewm3'].rolling(20).mean()
    df = df.dropna()

    # use this
    df['5maFeature'] = (df['5ma'] - df['ewm3'])/df['ewm3']
    df['20maFeature'] = (df['20ma'] - df['ewm3'])/df['ewm3']

    # Bollinger Bands and Standard Deviation Percentaged
    df['std'] = df['close'].rolling(20).std()
    df['upper'] = df['20ma'] + 2*df['std']
    df['lower'] = df['20ma'] - 2*df['std']
    df['disFromLower'] = df['close'] - df['lower']
    df['gap'] = df['upper'] - df['lower']
    df = df.dropna()
    df['stdpct'] = (df['std'] - df['ewm3'])/df['ewm3']
    df['bbFeature'] = df['disFromLower']/df['gap']

    # talib indicators
    df['rsi'] = talib.RSI(df['ewm3'], timeperiod=14)/100
    df['rocp'] = talib.ROCP(df['ewm3'], timeperiod=10)
    return df.dropna()

In [36]:
df = get_binance_data(ticker="XRPUSDT", timeframe='5m', period=400)
df = add_signals(df)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [44]:
fig = go.Figure(go.Histogram(x=df['rocp']))
fig.show()

### Experiment with hyper-params and other stuff

In [46]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier

In [48]:
ada = AdaBoostClassifier()
mlp = MLPClassifier(hidden_layer_sizes = (30,30,30))

In [49]:
input_features = df[['volFeature', '5maFeature', '20maFeature', 'rsi', 'rocp','bbFeature']].values
outputs = (df['target'] * 1).values

In [50]:
outputs

array([0, 0, 1, ..., 0, 0, 0])

In [51]:
ada.fit(X = input_features, y = outputs)
mlp.fit(X = input_features, y = outputs)

In [54]:
ada.score(input_features, outputs),mlp.score(input_features, outputs)

(0.8015216672179953, 0.8061615335062764)

In [53]:
mlp.score(input_features, outputs)

0.8061615335062764