In [34]:
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.utils import plot_model, model_to_dot
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import (Add, Multiply, LeakyReLU, Input, Conv1D, Dropout, Activation, BatchNormalization, MaxPooling1D, ZeroPadding1D, AveragePooling1D, Flatten, Dense)
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import TensorBoard, ModelCheckpoint, Callback, EarlyStopping
import tensorflow_addons as tfa
from tensorflow.python.client import device_lib
import matplotlib.pyplot as plt
import itertools
# from settings import *

data_dir = "."
output_dir = "./outputs"
model_dir = "./models"
COLS = ["Date", "Open", "High", "Low", "Close", "Volume"]
class_names = ['hold','buy', 'sell']
FlatLabel=0
ValleyLabel=1
PeakLabel=2

def get_available_devices():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos]


def create_train_test(df_train, test_size, window_size, shift, threshold):
    df_train = labelling(df_train, shift, threshold)
    range_buy_sell(df_train)
    print(df_train.query("Label == 1").shape, df_train.query("Label == 2").shape, df_train.query("Label == 0").shape)
    X, Y = slicing(df_train, window_size=window_size)
    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=0)
    #One-hot encode the labels
    y_train = to_categorical(y_train)
    y_test= to_categorical(y_test)
    return X_train, y_train, X_test, y_test


def get_wiki():
    df_wiki = pd.read_csv(f"{data_dir}/wiki.csv")
    df_wiki = df_wiki.rename(columns={"date": "Date"}).drop(["timestamp"], axis=1).set_index("Date")
    return df_wiki


def get_trend():
    df_trend = pd.read_csv(f"{data_dir}/trend.csv")
    df_trend = df_trend.assign(Date=df_trend['datetime'].str.split(' ',expand=True)[0])
    df_trend = df_trend.groupby("Date").sum("trend")[["trend"]]
    return df_trend


def get_btc():
    df_btc = pd.read_csv(f"{data_dir}/btc.csv")
    df_btc = df_btc[COLS]
    df_btc = df_btc.set_index("Date")
    return df_btc

def range_buy_sell(df):
    buy_point = []
    sell_point = []
    for data in df.iterrows():
        price = data[1]['Close']
        label = data[1]['Label']
        if label == 1:
            buy_point.append(price)
        elif label == 2:
            sell_point.append(price)

    buy_pos = 0
    sell_pos = -1
    for i, data in df.iterrows():
        price = data['Close']
        label = data['Label']
        if label == 1:
            sell_pos += 1
        elif label == 2:
            buy_pos += 1
            if buy_pos == len(buy_point):
                break
        else:
            cur_buy = buy_point[buy_pos]
            if abs(price - cur_buy)/cur_buy <= 0.03:
                df.at[i,'Label'] = 1
            if sell_pos == -1:
                continue
            cur_sell = sell_point[sell_pos]
            if abs(cur_sell - price)/cur_sell <= 0.03:
                df.at[i,'Label'] = 2

def collect_peaks(data, threshold, price_col="Close"):
    peaks = set()
    price_max = 0
    price_min = float('inf')
    date_start = data.head(1).index[0]
    profit = 0
    for d in data.iterrows():
        date = d[0]
        price = d[1][price_col]
        rise = (price - price_min) / price_min
        #print(date, price, price_min, rise)
        if price < price_min:
            price_min = price
            profit = (price_max - price_min) / price_min
            if profit >= threshold:
                peaks.add(date_start)
        elif price > price_max or rise >= threshold:
            price_min = price
            price_max = price
            date_start = date
    return sorted(list(peaks))
    
    
def collect_valleys(data, threshold, price_col="Close"):
    valleys = set()
    price_max = 0
    price_min = float('inf')
    date_start = data.head(1).index[0]
    profit = 0
    for d in data.iterrows():
        date = d[0]
        price = d[1][price_col]
        drop = (price_max - price) / price
        if price > price_max:
            price_max = price
            profit = (price_max - price_min) / price_min
            if profit >= threshold:
                valleys.add(date_start)
        elif price < price_min or drop >= threshold:
            price_min = price
            price_max = price
            date_start = date
    return sorted(list(valleys))


def collect_labels(data, threshold):
    peaks = collect_peaks(data, threshold)
    valleys = collect_valleys(data, threshold)
    flats = sorted(list(set(data.index) - set(peaks) - set(valleys)))
    peaks = pd.DataFrame({"Date": peaks, "Label": PeakLabel})
    valleys = pd.DataFrame({"Date": valleys, "Label": ValleyLabel})
    flats = pd.DataFrame({"Date": flats, "Label": FlatLabel})
    return pd.concat([peaks, valleys, flats], axis=0).sort_values("Date").set_index("Date")


def labelling(data, shift, threshold):
    labels = collect_labels(data, threshold)
    df = data.join(labels)
    df = df.sort_index()
    if shift:
        df = df.assign(y=df["Label"].shift(shift)).dropna().drop(["Label"], axis=1)
        df = df.rename(columns={"y": "Label"})
        df.Label = df.Label.astype(int)
    return df


def slicing(data, label_col="Label", window_size=30):
    X = []
    Y = []
    
    for i in range(len(data) - window_size + 1):
        x = data.iloc[i: i+window_size]
        y = x.tail(1)[label_col]
        X.append(x.drop([label_col], axis=1).to_numpy())
        Y.append(y.to_numpy())
        
    return np.array(X), np.array(Y)


def build_model(sequence_length, nb_features):
    n_filters = 64
    filter_width = 2
    dilation_rates = [i for i in range(1, 8)]
    
    history_seq = Input(shape=(sequence_length, nb_features))
    x = history_seq
    
    skips = []
    for dilation_rate in dilation_rates:
    
        # preprocessing - equivalent to time-distributed dense
        x = Conv1D(16, 1, padding='same', activation=LeakyReLU())(x)
    
        # filter
        x_f = Conv1D(filters=n_filters,
                     kernel_size=filter_width, 
                     padding='same',
                     dilation_rate=dilation_rate)(x)
        x_f = LeakyReLU()(x_f)
        x_f = BatchNormalization()(x_f)
    
        # gate
        x_g = Conv1D(filters=n_filters,
                     kernel_size=filter_width, 
                     padding='same',
                     dilation_rate=dilation_rate)(x)
        # combine filter and gating branches
        z = Multiply()([Activation('tanh')(x_f),
                        Activation('sigmoid')(x_g)])
    
        # postprocessing - equivalent to time-distributed dense
        z = Conv1D(16, 1, padding='same', activation=LeakyReLU())(z)
    
        # residual connection
        x = Add()([x, z])    
    
        # collect skip connections
        skips.append(z)
    
    # add all skip connection outputs 
    out = Activation(LeakyReLU())(Add()(skips))
    
    out = Flatten()(out)
    
    #Final dense layer
    out= Dense(len(class_names), activation="softmax")(out)
    
    model = Model(history_seq, out)
    optimizer = tf.keras.optimizers.Adam(learning_rate=3e-6)
    
    # https://www.tensorflow.org/addons/api_docs/python/tfa/metrics/F1Score#
    #f1_score = tfa.metrics.F1Score(num_classes=3, threshold=None)
    
    model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=('accuracy'))
    return model


def train_model(X_train, y_train, model, dataset, window_size, shift,\
                batch_size, epochs, patience, valid_size, device="/cpu:0"):
    checkpoint = ModelCheckpoint(f"{model_dir}/{dataset}_window_{window_size}_shift_{shift}.h5", monitor='val_loss', verbose=1, save_best_only=True, mode='max')
    
    early_stopping = EarlyStopping(monitor='val_loss', patience=patience)
    
    weight_val = len(np.where(y_train == np.array([1,0,0]))[0]) / len(np.where(y_train != np.array([1,0,0]))[0])
    sample_weight = np.ones(shape=(len(y_train),))
    sample_weight[np.where(y_train != np.array([1,0,0]))[0]] = weight_val
    
    with tf.device(device):
        train_history = model.fit(X_train, y_train,
                           steps_per_epoch=len(X_train)//batch_size,
                           epochs=epochs,
                           sample_weight=sample_weight,
                           shuffle=True,
                           verbose=1, validation_split=valid_size, callbacks=[early_stopping, checkpoint])
    return train_history, model


def show_final_history(history, dataset, window_size, shift):
    plt.style.use("ggplot")
    fig, ax = plt.subplots(1,2,figsize=(15,5))
    ax[0].set_title('Loss')
    ax[1].set_title('Accuracy')
    ax[0].plot(history.history['loss'],label='Train Loss')
    ax[0].plot(history.history['val_loss'],label='Validation Loss')
    ax[1].plot(history.history['accuracy'],label='Train Accura|cy')
    ax[1].plot(history.history['val_accuracy'],label='Validation Accuracy')
    
    ax[0].legend(loc='upper right')
    ax[1].legend(loc='lower right')
    
    output_dir = f"./outputs/{dataset}"
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    
    plt.savefig(f"{output_dir}/window_{window_size}_shift_{shift}.png")


def plot_confusion_matrix(cm, classes, title='Confusion Matrix', cmap=plt.cm.Blues):
    cm = cm.astype('float')/cm.sum(axis=1)[:,np.newaxis]
    plt.figure(figsize=(10,10))
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes,rotation=45)
    plt.yticks(tick_marks, classes)
    
    fmt = '.2f'
    thresh = cm.max()/2.
    for i,j in itertools.product(range(cm.shape[0]),range(cm.shape[1])):
        plt.text(j,i,format(cm[i,j],fmt),
                horizontalalignment="center",
                color="white" if cm[i,j] > thresh else "black")
    
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')


def dump_confusion_matrix(model, x, y, dataset, data_type, window_size, shift):
    pred = model.predict(x)
    pred = np.argmax(pred,axis=1)
    actual = np.argmax(y,axis=1)
    cnf_mat = confusion_matrix(actual, pred)
    np.set_printoptions(precision=2)
    
    plt.figure()
    plot_confusion_matrix(cnf_mat,classes=class_names)
    plt.grid(None)

    output_dir = f"./outputs/{dataset}"
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)
    
    plt.savefig(f"{output_dir}/confusiont_window_{window_size}_shift_{shift}_{data_type}.png")


In [30]:
df = get_btc()
df = labelling(df, 0, 0.1)
df.head(50)

  rise = (price - price_min) / price_min
  profit = (price_max - price_min) / price_min


Unnamed: 0_level_0,Open,High,Low,Close,Volume,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800,0
2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,0
2014-09-19,424.102997,427.834991,384.532013,394.79599,37919700,1
2014-09-20,394.673004,423.29599,389.882996,408.903992,36863600,0
2014-09-21,408.084991,412.425995,393.181,398.821014,26580100,0
2014-09-22,399.100006,406.915985,397.130005,402.152008,24127600,0
2014-09-23,402.09201,441.557007,396.196991,435.790985,45099500,2
2014-09-24,435.751007,436.112,421.131989,423.204987,30627700,0
2014-09-25,423.156006,423.519989,409.467987,411.574005,26814400,0
2014-09-26,411.428986,414.937988,400.009003,404.424988,21460800,0


In [33]:
range_buy_sell(df)
df.head(50)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800,0
2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,0
2014-09-19,424.102997,427.834991,384.532013,394.79599,37919700,1
2014-09-20,394.673004,423.29599,389.882996,408.903992,36863600,0
2014-09-21,408.084991,412.425995,393.181,398.821014,26580100,1
2014-09-22,399.100006,406.915985,397.130005,402.152008,24127600,1
2014-09-23,402.09201,441.557007,396.196991,435.790985,45099500,2
2014-09-24,435.751007,436.112,421.131989,423.204987,30627700,2
2014-09-25,423.156006,423.519989,409.467987,411.574005,26814400,0
2014-09-26,411.428986,414.937988,400.009003,404.424988,21460800,0


In [32]:
def range_buy_sell(df):
    buy_point = []
    sell_point = []
    for data in df.iterrows():
        price = data[1]['Close']
        label = data[1]['Label']
        if label == 1:
            buy_point.append(price)
        elif label == 2:
            sell_point.append(price)

    buy_pos = 0
    sell_pos = -1
    for i, data in df.iterrows():
        price = data['Close']
        label = data['Label']
        if label == 1:
            sell_pos += 1
        elif label == 2:
            buy_pos += 1
            if buy_pos == len(buy_point):
                break
        else:
            cur_buy = buy_point[buy_pos]
            if abs(price - cur_buy)/cur_buy <= 0.03:
                df.at[i,'Label'] = 1
            if sell_pos == -1:
                continue
            cur_sell = sell_point[sell_pos]
            if abs(cur_sell - price)/cur_sell <= 0.03:
                df.at[i,'Label'] = 2


In [29]:
df[:50]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800,0
2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,0
2014-09-19,424.102997,427.834991,384.532013,394.79599,37919700,1
2014-09-20,394.673004,423.29599,389.882996,408.903992,36863600,0
2014-09-21,408.084991,412.425995,393.181,398.821014,26580100,1
2014-09-22,399.100006,406.915985,397.130005,402.152008,24127600,1
2014-09-23,402.09201,441.557007,396.196991,435.790985,45099500,2
2014-09-24,435.751007,436.112,421.131989,423.204987,30627700,2
2014-09-25,423.156006,423.519989,409.467987,411.574005,26814400,0
2014-09-26,411.428986,414.937988,400.009003,404.424988,21460800,0


In [16]:
df[:50]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800,0
2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,0
2014-09-19,424.102997,427.834991,384.532013,394.79599,37919700,1
2014-09-20,394.673004,423.29599,389.882996,408.903992,36863600,0
2014-09-21,408.084991,412.425995,393.181,398.821014,26580100,0
2014-09-22,399.100006,406.915985,397.130005,402.152008,24127600,0
2014-09-23,402.09201,441.557007,396.196991,435.790985,45099500,2
2014-09-24,435.751007,436.112,421.131989,423.204987,30627700,0
2014-09-25,423.156006,423.519989,409.467987,411.574005,26814400,0
2014-09-26,411.428986,414.937988,400.009003,404.424988,21460800,0


In [17]:
df[50:100]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2014-11-06,339.458008,352.966003,338.424011,349.290009,18797000,0
2014-11-07,349.817993,352.731995,341.776001,342.415009,16834200,0
2014-11-08,342.153992,347.032013,342.153992,345.488007,8535470,0
2014-11-09,345.376007,363.626007,344.255005,363.264008,24205600,0
2014-11-10,362.265015,374.81601,357.561005,366.924011,30450100,0
2014-11-11,365.856995,371.309998,363.734985,367.695007,15838900,0
2014-11-12,367.984985,429.717987,367.984985,423.561005,45783200,2
2014-11-13,427.27301,457.092987,401.122986,420.734985,58945000,0
2014-11-14,418.416992,419.252014,384.789001,397.817993,29589200,0
2014-11-15,399.649994,405.528015,371.007996,376.132996,15727500,0


In [47]:
df.tail(10)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Label
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2021-12-01,56907.964844,59041.683594,56553.082031,57229.828125,36858195307,0
2021-12-02,57217.371094,57349.234375,55895.132812,56477.816406,32379968686,0
2021-12-03,56509.164062,57482.167969,52496.585938,53598.246094,39789134215,0
2021-12-04,53727.878906,53904.679688,42874.617188,49200.703125,61385677469,0
2021-12-05,49201.519531,49768.148438,47857.496094,49368.847656,37198201161,0
2021-12-06,49413.480469,50929.519531,47281.035156,50582.625,37707308001,0
2021-12-07,50581.828125,51934.78125,50175.808594,50700.085938,33676814852,0
2021-12-08,50667.648438,51171.375,48765.988281,50504.796875,28479699446,0
2021-12-09,50450.082031,50797.164062,47358.351562,47672.121094,29603577251,0
2021-12-10,47597.632812,49899.292969,47415.722656,48018.304688,31143962624,0


In [39]:
len(df[df['Label'] == 0])

2168

In [54]:
hold = 1000000
sharehold = 0
percentage = 0.2



for i, data in res.iterrows():
    label = data['Label']
    price = data['Close']
    print(label, price)
    if label == 1 and hold >= price:
        if hold*percentage < price:
            hold -= price
            sharehold += 1
        else:
            to_buy = hold*percentage
            hold *= (1-percentage)
            sharehold += to_buy/price
            
    elif label == 2 and sharehold >= 1/(percentage*2):
        to_sell = sharehold*percentage
        sharehold -= to_sell
        hold += to_sell * price


latest_price = res.iloc[-1]['Close']
print(f'final hold of cash: ${hold}', )
print(f'final hold of bitcoin: {sharehold}, which equals to {sharehold*latest_price} usd dollars')
print(f'total: ${hold+sharehold*latest_price}')

0.0 457.3340148925781
0.0 424.4400024414063
1.0 394.7959899902344
0.0 408.9039916992188
1.0 398.8210144042969
1.0 402.1520080566406
2.0 435.7909851074219
2.0 423.2049865722656
0.0 411.5740051269531
0.0 404.4249877929688
0.0 399.5199890136719
0.0 377.1809997558594
0.0 375.4670104980469
0.0 386.9440002441406
0.0 383.614990234375
0.0 375.0719909667969
0.0 359.5119934082031
1.0 328.8659973144531
1.0 320.510009765625
1.0 330.0790100097656
0.0 336.18701171875
0.0 352.94000244140625
0.0 365.0260009765625
0.0 361.56201171875
0.0 362.2990112304688
0.0 378.5490112304688
2.0 390.41400146484375
2.0 400.8699951171875
2.0 394.7730102539063
0.0 382.5559997558594
0.0 383.7579956054688
2.0 391.4419860839844
2.0 389.5459899902344
0.0 382.8450012207031
0.0 386.4750061035156
0.0 383.1579895019531
0.0 358.4169921875
0.0 358.3450012207031
0.0 347.27099609375
0.0 354.7040100097656
0.0 352.989013671875
0.0 357.6180114746094
0.0 335.59100341796875
0.0 345.30499267578125
0.0 338.3210144042969
1.0 325.7489929199