In [1]:
# import packages
import pandas as pd
import numpy as np

# to plot within notebook
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = 16,9

# read data, set index as dates, then plot price
df = pd.read_csv('../res/input0130.csv')
df.index = df['date']

In [None]:
plt.plot(df.index, df['close'], label='Price History')
plt.legend()

In [None]:
# plot returns
plt.plot(df['date'], df['log_r'], label='Price History')
plt.legend()

In [None]:
# test some important properties
import statsmodels.tsa.stattools as ts

# acf and pacf
# acf figure:
plt.stem(ts.acf(df.log_r))

In [None]:
# pacf figure:
plt.stem(ts.pacf(df.log_r))

In [None]:
# adf test
print('adf test result:')
print(ts.adfuller(df.log_r))
print('kpss test result:')
print(ts.kpss(df.log_r))
# results are pretty good, so primarily we say it's wide-stationary

In [2]:
#importing required libraries
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, LSTM
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical
# follow the literature
# we don't use min-max scaling here, use partial mean-std scaling instead
from sklearn.preprocessing import StandardScaler
from itertools import chain

# and we define our model here
def lstm_model():
    model = Sequential()
    model.add(LSTM(units=25, dropout=0.1, return_sequences=True, input_shape=(240,1)))
    model.add(Activation("tanh"))
    model.add(LSTM(units=25))
    model.add(Activation("tanh"))
    model.add(Dense(2, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam')
    return model

Using TensorFlow backend.


In [3]:
# we use rolling window, 750 bars for train and 250 for test
scaler = StandardScaler()
starter = range(240,len(df)-1000,250)
all_results = []
all_evas = []
y_pred_clf = []
for train_begin in starter:
    train_end = train_begin + 750
    test_end = train_end + 250
    train_set = df[{'log_r','label'}][train_begin:train_end].reset_index()
    x_train, y_train = [], []
    x_train_set = list(chain.from_iterable(scaler.fit_transform(train_set['log_r'].values.reshape(-1,1))))
    for i in range(240,len(x_train_set)):
        x_train.append(x_train_set[i-240:i])
        y_train.append(train_set['label'][i])
    x_train, y_train = np.array(x_train), np.array(y_train)
    y_train = to_categorical(y_train,2)
    x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1)) 
    
    model = lstm_model()
    model.fit(x_train,y_train,epochs=100,callbacks=[EarlyStopping(monitor='loss',patience=10)])

    x_test, y_test = [], []
    test_set = df[{'log_r','label'}][train_end-240:test_end].reset_index()
    x_test_set = list(chain.from_iterable(scaler.transform(test_set['log_r'].values.reshape(-1,1))))
    for i in range(240,len(x_test_set)):
        x_test.append(x_test_set[i-240:i])
        y_test.append(test_set['label'][i])
    x_test, y_test = np.array(x_test), np.array(y_test)
    x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1)) 
    y_test = to_categorical(y_test,2)
    
    all_results.append(model.predict(x_test))
    all_evas.append(model.evaluate(x_test, y_test))
    y_pred_clf.append(model.predict_classes(x_test))
    break


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 7

In [13]:
from sklearn import metrics


for train_begin in starter:
    train_end = train_begin + 750
    test_end = train_end + 250
    train_set = df[{'log_r','label'}][train_begin:train_end].reset_index()
    x_train, y_train = [], []
    x_train_set = list(chain.from_iterable(scaler.fit_transform(train_set['log_r'].values.reshape(-1,1))))
    for i in range(240,len(x_train_set)):
        x_train.append(x_train_set[i-240:i])
        y_train.append(train_set['label'][i])
    x_train, y_train = np.array(x_train), np.array(y_train)
    # y_train = to_categorical(y_train,2)
    x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1)) 
    x_test, y_test = [], []
    test_set = df[{'log_r','label'}][train_end-240:test_end].reset_index()
    x_test_set = list(chain.from_iterable(scaler.transform(test_set['log_r'].values.reshape(-1,1))))
    for i in range(240,len(x_test_set)):
        x_test.append(x_test_set[i-240:i])
        y_test.append(test_set['label'][i])
    x_test, y_test = np.array(x_test), np.array(y_test)
    x_test = np.reshape(x_test, (x_test.shape[0],x_test.shape[1],1)) 
    y_pred = model.predict_classes(x_train)
    # all_results.append(model.predict(x_test))
    print(metrics.accuracy_score(y_train, y_pred))
    # y_pred_clf.append(model.predict_classes(x_test))

0.7137254901960784
0.6254901960784314
0.5450980392156862
0.4980392156862745
0.49607843137254903
0.5313725490196078


KeyboardInterrupt: 

In [None]:
from sklearn import metrics

metrics.accuracy_score()

In [8]:
model.evaluate(x_test, y_test)



0.92156671667099

In [4]:
all_results

[array([[0.58558035, 0.41441968],
        [0.632435  , 0.36756495],
        [0.7901696 , 0.20983036],
        [0.8393114 , 0.16068858],
        [0.7285606 , 0.27143937],
        [0.76550853, 0.23449145],
        [0.60321194, 0.396788  ],
        [0.39562562, 0.60437435],
        [0.8017488 , 0.19825116],
        [0.71366394, 0.28633603],
        [0.89344186, 0.10655807],
        [0.55107474, 0.44892526],
        [0.88251483, 0.1174852 ],
        [0.9040997 , 0.09590032],
        [0.72038823, 0.27961177],
        [0.8730326 , 0.12696736],
        [0.9119338 , 0.08806619],
        [0.8963829 , 0.10361706],
        [0.9506015 , 0.04939847],
        [0.8860689 , 0.11393112],
        [0.8829777 , 0.11702226],
        [0.57153374, 0.42846626],
        [0.90522677, 0.09477323],
        [0.6834715 , 0.3165285 ],
        [0.58991265, 0.41008735],
        [0.5537586 , 0.44624138],
        [0.15473409, 0.84526587],
        [0.06548354, 0.93451643],
        [0.0983642 , 0.9016358 ],
        [0.076

In [5]:
all_evas

[0.92156671667099]

In [14]:
model.save('lstm_r.h5')

In [15]:
# see what's in model
model.load('lstm_r.h5')
model.summary()

AttributeError: 'Sequential' object has no attribute 'load'

In [None]:
from tqdm import tqdm
from keras.models import load_model, Model
from sa import fetch_dsa, fetch_lsa, get_sc, 
from utils import *

model = load_model('lstm_r.h5')
default_upper_bound = 2000
default_n_bucket = 1000
default_n_classes = 2

In [None]:
class Args(): #创建一个类
    def __init__(self): #定义初始化信息。
        self.is_classification = True
        self.save_path = './tmp/'
        self.d = 'lstm_r'
        self.num_classes = 2
        self.lsa = True
        self.dsa = True
        self.target = 'none'
        self.batch_size = 128
        self.var_threshold = 1e-5
        self.upper_bound = 2000
        self.n_bucket = 1000
        self.num_classes = 10
        self.is_classification = True
args = Args()

In [None]:
def fetch_lsa(model, x_train, x_target, target_name, layer_names, args):
    """Likelihood-based SA

    Args:
        model (keras model): Subject model.
        x_train (list): Set of training inputs.
        x_target (list): Set of target (test or[] adversarial) inputs.
        target_name (str): Name of target set.
        layer_names (list): List of selected layer names.
        args: Keyboard args.

    Returns:
        lsa (list): List of lsa for each target input.
    """

    prefix = info("[" + target_name + "] ")
    train_ats, train_pred, target_ats, target_pred = _get_train_target_ats(
        model, x_train, x_target, target_name, layer_names, args
    )

    class_matrix = {}
    if args.is_classification:
        for i, label in enumerate(train_pred):
            label = label[0]
            if label not in class_matrix:
                class_matrix[label] = []
            class_matrix[label].append(i)

    kdes, removed_cols = _get_kdes(train_ats, train_pred, class_matrix, args)

    lsa = []
    print(prefix + "Fetching LSA")
    if args.is_classification:
        for i, at in enumerate(tqdm(target_ats)):
            label = target_pred[i]
            kde = kdes[label]
            lsa.append(_get_lsa(kde, at, removed_cols))
    else:
        kde = kdes[0]
        for at in tqdm(target_ats):
            lsa.append(_get_lsa(kde, at, removed_cols))

    return lsa

In [None]:
layer_names = ['lstm_34']

In [None]:
# pred = model.predict_classes(x_test)
# matrix = {}
# for i,label in enumerate(pred):
#     if label[0] not in matrix:
#         print(label)

In [None]:
test_lsa = fetch_lsa(model, x_train, x_test, "test", layer_names, args)
target_lsa = fetch_lsa(model, x_train, x_test, "target", layer_names, args)
# target_lsa = fetch_lsa(model, x_train, x_target, args.target, layer_names, args)
target_cov = get_sc(
    np.amin(target_lsa), default_upper_bound, default_n_bucket, target_lsa
)

auc = compute_roc_auc(test_lsa, target_lsa)
print(infog("ROC-AUC: " + str(auc * 100)))

print(infog("LSA coverage: " + str(target_cov)))

In [None]:
test_dsa = fetch_dsa(model, x_train, x_test, "test", layer_names, args)

target_dsa = fetch_dsa(model, x_train, x_test, args.target, layer_names, args)
target_cov = get_sc(
    np.amin(target_dsa), default_upper_bound, default_n_bucket, target_dsa
)

auc = compute_roc_auc(test_dsa, target_dsa)
print(infog("ROC-AUC: " + str(auc * 100)))

print(infog("DSA coverage: " + str(target_cov)))