In [None]:
import pandas as pd
import numpy as np

import xgboost as xgb
from xgboost import plot_importance, plot_tree
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV

# Chart drawing
import plotly as py
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

from fetch_data import *

In [None]:
# Get data from the API
prices_data = fetchPriceData(id='bitcoin', vs_currency='usd')
ohlc_data = fetchCandleData(id='bitcoin', vs_currency='usd')

In [None]:
prices_data['EMA_9'] = prices_data['prices'].ewm(9).mean().shift()
prices_data['SMA_5'] = prices_data['prices'].rolling(5).mean().shift()
prices_data['SMA_10'] = prices_data['prices'].rolling(10).mean().shift()
prices_data['SMA_15'] = prices_data['prices'].rolling(15).mean().shift()
prices_data['SMA_30'] = prices_data['prices'].rolling(30).mean().shift()

In [53]:
def calculateRSI(prices_data, n=14):
    """Calculate the Relative Strength Index of an asset.

    Args:
        prices_data (pandas dataframe object): prices data
        n (int, optional): number of . Defaults to 14.
    Return:
        rsi (pandas series object): relative strength index
    """
    price = prices_data['prices']
    delta = price.diff()
    delta = delta[1:]

    prices_up = delta.copy()
    prices_up[prices_up < 0] = 0
    prices_down = delta.copy()
    prices_down[prices_down > 0] = 0

    roll_up = prices_up.rolling(n).mean()
    roll_down = prices_down.abs().rolling(n).mean()

    relative_strength = roll_up / roll_down
    rsi = 100.0 - (100.0 / (1.0 + relative_strength))

    return rsi

In [None]:
prices_data['RSI'] = calculateRSI(prices_data).fillna(0)

In [None]:
def calculateMACD(prices_data):
    """Calculate the MACD of EMA12 and EMA26 of an asset

    Args:
        prices_data (dataframe): prices data

    Returns:
        macd (pandas series object): macd of the asset
        macd_signal (pandas series object): macd signal of the asset
    """
    ema12 = pd.Series(prices_data['prices'].ewm(span=12, min_periods=12).mean())
    ema26 = pd.Series(prices_data['prices'].ewm(span=26, min_periods=26).mean())

    macd = pd.Series(ema12 - ema26)
    macd_signal = pd.Series(macd.ewm(span=9, min_periods=9).mean())

    return macd, macd_signal

In [None]:
macd, macd_signal = calculateMACD(prices_data)
prices_data['MACD'] = macd
prices_data['MACD_signal'] = macd_signal

In [None]:
# Shift label(y) col by one value to predict the next day using today's data (technical indicators)
prices_data['prices'] = prices_data['prices'].shift(-1)

In [None]:
# Drop invalid samples - the samples where moving averages exceed 
prices_data = prices_data.iloc[33:]
prices_data = prices_data[:-1] # since we did shifting by one
prices_data.index = range(len(prices_data)) # update indexes

In [None]:
prices_data.shape

In [None]:
test_size  = 0.15
valid_size = 0.15

test_split_idx  = int(prices_data.shape[0] * (1-test_size))
valid_split_idx = int(prices_data.shape[0] * (1-(valid_size+test_size)))

train_prices_data  = prices_data.loc[:valid_split_idx].copy()
valid_prices_data  = prices_data.loc[valid_split_idx+1:test_split_idx].copy()
test_prices_data   = prices_data.loc[test_split_idx+1:].copy()

fig = go.Figure()
fig.add_trace(go.Scatter(x=train_prices_data.time, y=train_prices_data.prices, name='Training'))
fig.add_trace(go.Scatter(x=valid_prices_data.time, y=valid_prices_data.prices, name='Validation'))
fig.add_trace(go.Scatter(x=test_prices_data.time,  y=test_prices_data.prices,  name='Test'))

In [None]:
drop_cols = ['market_caps', 'total_volumes', 'time', 'date']

train_prices_data = train_prices_data.drop(drop_cols, axis=1)
valid_prices_data = valid_prices_data.drop(drop_cols, axis=1)
test_prices_data = test_prices_data.drop(drop_cols, axis=1)

In [None]:
y_train = train_prices_data['prices'].copy()
X_train = train_prices_data.drop(['prices'], axis=1)

y_valid = valid_prices_data['prices'].copy()
X_valid = valid_prices_data.drop(['prices'], axis=1)

y_test  = test_prices_data['prices'].copy()
X_test  = test_prices_data.drop(['prices'], axis=1)

X_train.info()

In [None]:
%%time

parameters = {
    'n_estimators': [400],
    'learning_rate': [0.01, 0.05],
    'max_depth': [8],
    'gamma': [0.01, 0.02],
    'random_state': [42]
}

eval_set = [(X_train, y_train), (X_valid, y_valid)]
model = xgb.XGBRegressor(eval_set=eval_set, objective='reg:squarederror', verbose=False)
clf = GridSearchCV(model, parameters)

clf.fit(X_train, y_train)

print(f'Best params: {clf.best_params_}')
print(f'Best validation score = {clf.best_score_}')

In [None]:
%%time

model = xgb.XGBRegressor(**clf.best_params_, objective='reg:squarederror')
model.fit(X_train, y_train, eval_set=eval_set, verbose=False)

In [None]:
plot_importance(model);

In [None]:
y_pred = model.predict(X_test)
print(f'y_true = {np.array(y_test)[:5]}')
print(f'y_pred = {y_pred[:5]}')

In [None]:
print(f'mean_squared_error = {mean_squared_error(y_test, y_pred)}')

In [None]:
predicted_prices = prices_data.loc[test_split_idx+1:].copy()
predicted_prices['prices'] = y_pred

fig = make_subplots(rows=2, cols=1)
fig.add_trace(go.Scatter(x=prices_data.time, y=prices_data.prices,
                         name='Truth',
                         marker_color='LightSkyBlue'), row=1, col=1)

fig.add_trace(go.Scatter(x=predicted_prices.time,
                         y=predicted_prices.prices,
                         name='Prediction',
                         marker_color='MediumPurple'), row=1, col=1)

fig.add_trace(go.Scatter(x=predicted_prices.time,
                         y=y_test,
                         name='Truth',
                         marker_color='LightSkyBlue',
                         showlegend=False), row=2, col=1)

fig.add_trace(go.Scatter(x=predicted_prices.time,
                         y=y_pred,
                         name='Prediction',
                         marker_color='MediumPurple',
                         showlegend=False), row=2, col=1)

In [2]:
import pandas as pd
from fetch_data import *
from feature_extraction import extractAll

In [3]:
prices_data = fetchPriceData()

In [4]:
clean = extractAll(prices_data)

In [6]:
import xgboost as xgb

In [63]:
clean_test = clean.iloc[-2:, :]
clean = clean.iloc[:-2, :]
clean_test

Unnamed: 0,prices,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal
1762,39123.753751,38051.430041,38334.063125,38223.252109,38110.483655,37862.69229,69.37952,316.622724,211.052483
1763,39850.202184,38153.946414,38549.128039,38261.540233,38227.484296,37857.227526,67.349403,356.370119,240.11601


In [28]:
parameters = {'gamma':0.01, 'learning_rate':0.05, 'max_depth':8, 'n_estimators':400}
model = xgb.XGBRegressor(**parameters, objective='reg:squarederror')
y_train = clean['prices'].copy()
X_train = clean.drop(['prices'], axis=1)

y_test = clean_test['prices'].copy()
X_test = clean_test.drop(['prices'], axis=1)

In [33]:
model.fit(X_train, y_train, verbose=False)

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0.01, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.05, max_delta_step=0, max_depth=8,
             min_child_weight=1, missing=nan, monotone_constraints='()',
             n_estimators=400, n_jobs=8, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

In [157]:
# Now I want to do predictions
prediction_days = 10
last_14 = clean.iloc[-14:, :]
df_res = pd.DataFrame(columns=['prices', 'EMA_9', 'SMA_5','SMA_10','SMA_15','SMA_30','RSI',	'MACD',	'MACD_signal'])
# print(y_test)
# print(model.predict(X_test))
# last_14

In [172]:
def trainModel(prices_data):
    """Train the model on historical data

    Args:
        prices_data (pandas dataframe object): prices _data

    Returns:
        model: XGBRegressor model trained on live data
    """
    X_train = clean.drop(['prices'], axis=1)
    y_train = clean['prices'].copy()

    # Make the model on given parameters
    parameters = {'gamma':0.01, 'learning_rate':0.05, 'max_depth':8, 'n_estimators':400}
    model = xgb.XGBRegressor(**parameters, objective='reg:squarederror')
    # Train model
    model.fit(X_train, y_train, verbose=False)

    return model

In [192]:
def quantPredictPrices(prices_data, num_days):
    """Predict prices based on historical data

    Args:
        prices_data (pandas dataframe object): prices data
        num_ticks (int): number of days in future to predict the results
    Returns:
        (pandas dataframe object): dataframe with future predicted prices
    """
    # Store time for plotting
    latest_time = prices_data.iloc[-1]['time']
    future_times = []
    predictions = pd.DataFrame(columns=['prices', 'EMA_9', 'SMA_5','SMA_10','SMA_15','SMA_30','RSI',	'MACD',	'MACD_signal'])

    prediction_data = extractAll(prices_data)
    model = trainModel(prediction_data)
    # Now lets do predictions

    latest_ticks = prediction_data.iloc[-14:, :]

    # Get the data for that many days (6 * num_days as we have 4-hour ticks data)
    for i in range(1, num_days*6 + 1): 
        X = latest_ticks.drop(['prices'], axis=1)
        y = latest_ticks.iloc[-1]['prices']

        features = X.iloc[-1:, :]
        predict_features = np.array(features).reshape(-1, 8)

        # Get the next price
        price = model.predict(predict_features)[0]

        # Calculate other features based on todays price
        ema9  = (y * (1 - 2/(9 + 1)) + price * (2/(9 + 1)))
        sma5  = ((features['SMA_5'].values[0] * 4 + price) / 5)
        sma10 = ((features['SMA_10'].values[0] * 9 + price) / 10)
        sma15 = ((features['SMA_15'].values[0] * 14 + price) / 15)
        sma30 = ((features['SMA_30'].values[0] * 29 + price) / 30)

        rsi = calculateRSI(prices_data = latest_ticks.iloc[-13:, :], today_price=price).iloc[-1]

        macd = (sma30 - sma15)
        macd_signal = (features['MACD_signal'].values[0] * (1 - 2/(9+1)) + macd * (2/(9+1)))

        latest_ticks.loc[len(latest_ticks)] = [price, ema9, sma5, sma10, sma15, sma30, rsi, macd, macd_signal]
        predictions.loc[len(predictions)]   = [price, ema9, sma5, sma10, sma15, sma30, rsi, macd, macd_signal]
        future_times.append(latest_time + datetime.timedelta(hours = 4 * i))
    
    # Add date and time
    predictions['time'] = pd.Series(future_times)
    predictions['date'] = predictions['time'].dt.date

    return predictions
    

In [193]:
# a = fetchPriceData()

In [194]:
res = quantPredictPrices(a, 20)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [197]:
(res)

Unnamed: 0,prices,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal,time,date
0,39446.632812,39830.932366,39779.335329,39325.916347,38899.363773,38218.131231,,-681.232542,238.031396,2021-08-01 08:01:14,2021-08-01
1,37305.398438,39018.385938,39284.547951,39123.864556,38793.099417,38187.706804,,-605.392613,69.346594,2021-08-05 08:01:14,2021-08-05
2,37020.261719,37248.371094,38831.690705,38913.504272,38674.910237,38148.791968,,-526.118269,-49.746378,2021-08-09 08:01:14,2021-08-09
3,36976.355469,37011.480469,38460.623657,38719.789392,38561.673253,38109.710752,,-451.962501,-130.189603,2021-08-13 08:01:14,2021-08-13
4,37166.496094,37014.383594,38201.798145,38564.460062,38468.661442,38078.270263,,-390.391179,-182.229918,2021-08-17 08:01:14,2021-08-17
...,...,...,...,...,...,...,...,...,...,...,...
115,29870.265625,29820.218750,29808.183590,29823.227391,29907.296439,30490.028479,,582.732040,652.182083,2022-11-04 08:01:14,2022-11-04
116,29870.265625,29870.265625,29820.599997,29827.931214,29904.827718,30469.369717,,564.541999,634.654066,2022-11-08 08:01:14,2022-11-08
117,29737.660156,29843.744531,29804.012029,29818.904108,29893.683214,30444.979398,,551.296185,617.982490,2022-11-12 08:01:14,2022-11-12
118,29743.492188,29738.826563,29791.908061,29811.362916,29883.670479,30421.596491,,537.926013,601.971194,2022-11-16 08:01:14,2022-11-16


In [162]:
for i in range(20):
    import numpy as np
    X = last_14.drop(['prices'], axis=1)
    y = last_14['prices']

    features = X.iloc[-1:, :]

    # print(np.array(features))
    # print(features)
    predict_features = np.array(features).reshape(-1, 8)
    # print('---------------------------')
    # print(predict_features)
    # price = model.predict(predict_features)
    # print('---------------------------')
    # print(price)
    ema9  = (y.iloc[-1] * (1 - 2/(9 + 1)) + price * (2/(9 + 1)))
    sma5  = (features['SMA_5'].values[0] * 4 + price) / 5
    sma10 = (features['SMA_10'].values[0] * 9 + price) / 10
    sma15 = (features['SMA_15'].values[0] * 14 + price) / 15
    sma30 = (features['SMA_30'].values[0] * 29 + price) / 30

    rsi = calculateRSI(prices_data = last_14.iloc[-14:, :], today_price = price)

    macd = (sma30 - sma15)
    macd_signal = (features['MACD_signal'].values[0] * (1 - 2/(9+1)) + macd * (2/(9+1)) )
    # print('---------------------------')

    # print(price[0])
    # print('---------------------------')

    # print(ema9[0]) 
    # print('---------------------------')

    # print(sma5[0], sma10[0], sma15[0], sma30[0])
    # print('---------------------------')

    # print(rsi.iloc[-1])
    # print('---------------------------')
    # print(features['MACD_signal'].values[0])
    # print(macd[0], macd_signal[0])
    last_14.loc[len(last_14)] = [price[0], ema9[0], sma5[0], sma10[0], sma15[0], sma30[0], rsi.iloc[-1], macd[0], macd_signal[0]]
    df_res.loc[len(df_res.index)] = [price[0], ema9[0], sma5[0], sma10[0], sma15[0], sma30[0], rsi.iloc[-1], macd[0], macd_signal[0]]


    

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value, self.name)


In [161]:
df_res

Unnamed: 0,prices,EMA_9,SMA_5,SMA_10,SMA_15,SMA_30,RSI,MACD,MACD_signal
0,39013.40625,39063.957031,38240.898438,38246.148438,38028.035156,37897.886719,65.829088,-130.148438,121.69825
1,38629.96875,38936.71875,38318.710938,38284.53125,38068.167969,37922.292969,56.266648,-145.875,68.183594
2,38628.007812,38629.578125,38380.570312,38318.878906,38105.492188,37945.816406,49.791772,-159.675781,22.611719
3,38471.960938,38596.796875,38398.851562,38334.1875,38129.921875,37963.355469,46.801349,-166.566406,-15.223907
4,38289.222656,38435.414062,38376.925781,38329.691406,38140.542969,37974.214844,52.281825,-166.328125,-45.444752
5,38547.132812,38340.804688,38410.96875,38351.433594,38167.648438,37993.3125,55.305771,-174.335938,-71.222992
6,38413.996094,38520.507812,38411.574219,38357.691406,38184.070312,38007.332031,62.285041,-176.738281,-92.32605
7,38469.421875,38425.082031,38423.144531,38368.863281,38203.097656,38022.734375,61.129786,-180.363281,-109.933502
8,38504.519531,38476.441406,38439.417969,38382.429688,38223.191406,38038.792969,59.243574,-184.398438,-124.826492
9,38608.886719,38525.390625,38473.3125,38405.074219,38248.902344,38057.796875,59.960213,-191.105469,-138.082291


In [3]:
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

from fetch_data import *

In [4]:
prices_data = fetchPriceData()
ohlc_data = fetchCandleData()

In [16]:
fig = go.Figure(go.Ohlc(x = ohlc_data['time'],
              open=ohlc_data['open'],
              high=ohlc_data['high'],
              low=ohlc_data['low'],
              close=ohlc_data['close'],
              name='Price',
              title='Candle Chart for One Month Data'))


fig.update(layout_xaxis_rangeslider_visible=False)
fig.update_layout(margin=dict(l=10, r=10, t=10, b=10), width=900, height=300)
fig.show()