### Import modules

In [1]:
import keras_bitcoin
import dataset
import collections
import numpy as np

Using TensorFlow backend.


### Create Train/Test Dataset

In [2]:
df = dataset.get_labeled_dataset(number_of_file=12, from_date="2017-12-16")
print(collections.Counter(df["label"]))

Number of files loaded :  12
Counter({0: 9621, 1: 4981})


### Format Dataset

In [5]:
texts = df["text"]
labels = df["label"]
texts_train, texts_test , labels_train, labels_test, vocab_length, max_sentence_size = keras_bitcoin.get_train_test_data(texts, labels)

### Set Model Params

In [8]:
keras_bitcoin.available_activation_functions

['tanh',
 'elu',
 'softmax',
 'selu',
 'softplus',
 'softsign',
 'relu',
 'sigmoid',
 'hard_sigmoid',
 'exponential',
 'linear']

In [45]:
#
#bad result alone : relu, elu
#good result alone : sigmoid
#ep = 15, batch = 100 +  sig + relu + tanh = 0.32
#ep = 15, batch = 100 +  sig + tanh        = 0.67
#ep = 15, batch = 100 + tanh + sig         = 0.67
epochs=5
batch_size=None
activations_functions=["PReLU"]
dropouts = {}

### Train Mode

In [46]:
model = keras_bitcoin.get_model(texts_train, labels_train, vocab_length, max_sentence_size, epochs=epochs, batch_size=batch_size, activations_functions=activations_functions)

ValueError: Unknown activation function:PReLU

###  Model Evaluation

In [86]:
loss, accuracy = model.evaluate(texts_test, labels_test, verbose=1)
print('Accuracy: %f' % (accuracy*100))

Accuracy: 62.161034


###  Get Predictions

In [7]:
df_test = dataset.get_labeled_dataset(number_of_file = 30, from_date = "2017-02-10", date_included = False, all_files=True)
print(df_test.shape,df_test.head(2))

Number of files loaded :  668
(114664, 3)                                                 text  label        date
0  Recent comment by him with regards to Gavin An...      1  2015-12-15
1          I'm glad your trolling has come to an end      1  2015-12-15


In [15]:
preds = keras_bitcoin.get_predictions(list(df_test["text"]), model, vocab_length, max_sentence_size)
df_test["preds"] = list(map(lambda x : int(x), preds))
df_test["correct"] = np.equal(preds, df_test["label"])

### Predictions Stats

In [16]:
dataset.get_prediction_stats(df_test)

Number Correct/Wrong Guess : 51984/62680
              Accuracy : 45.33593804507082
Invalid sentences count 10


### Find best settings

In [92]:
import json
epochs=5
batch_size=None
perfs = {}
perfs["epochs"] = epochs
perfs["batch_size"] = batch_size
perfs["number_files_used"] = 12
for function in keras_bitcoin.available_activation_functions:
    model = keras_bitcoin.get_model(texts_train, labels_train, vocab_length, max_sentence_size, epochs=epochs, batch_size=batch_size, activations_functions=[function])
    loss, accuracy = model.evaluate(texts_test, labels_test, verbose=1)
    perfs[function] = accuracy
    
    
with open(f"./keras_stats/ep={epochs}_bats={batch_size}.json", "w") as fp:
    json.dump(perfs, fp)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_96"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_96 (Embedding)     (None, 1116, 20)          463600    
_________________________________________________________________
flatten_96 (Flatten)         (None, 22320)             0         
_________________________________________________________________
dense_101 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_97"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_97 (Embedding)     (None, 1116, 20)          463600    
_________________________________________________________________
flatten_97 (Flatten)         (None, 22320)             0         
_________________________________________________________________
dense_102 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_98"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_98 (Embedding)     (None, 1116, 20)          463600    
_________________________________________________________________
flatten_98 (Flatten)         (None, 22320)             0         
_________________________________________________________________
dense_103 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_99"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_99 (Embedding)     (None, 1116, 20)          463600    
_________________________________________________________________
flatten_99 (Flatten)         (None, 22320)             0         
_________________________________________________________________
dense_104 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_100"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_100 (Embedding)    (None, 1116, 20)          463600    
_________________________________________________________________
flatten_100 (Flatten)        (None, 22320)             0         
_________________________________________________________________
dense_105 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_101"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_101 (Embedding)    (None, 1116, 20)          463600    
_________________________________________________________________
flatten_101 (Flatten)        (None, 22320)             0         
_________________________________________________________________
dense_106 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_102"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_102 (Embedding)    (None, 1116, 20)          463600    
_________________________________________________________________
flatten_102 (Flatten)        (None, 22320)             0         
_________________________________________________________________
dense_107 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_103"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_103 (Embedding)    (None, 1116, 20)          463600    
_________________________________________________________________
flatten_103 (Flatten)        (None, 22320)             0         
_________________________________________________________________
dense_108 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_104"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_104 (Embedding)    (None, 1116, 20)          463600    
_________________________________________________________________
flatten_104 (Flatten)        (None, 22320)             0         
_________________________________________________________________
dense_109 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_105"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_105 (Embedding)    (None, 1116, 20)          463600    
_________________________________________________________________
flatten_105 (Flatten)        (None, 22320)             0         
_________________________________________________________________
dense_110 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Model: "sequential_106"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_106 (Embedding)    (None, 1116, 20)          463600    
_________________________________________________________________
flatten_106 (Flatten)        (None, 22320)             0         
_________________________________________________________________
dense_111 (Dense)            (None, 1)                 22321     
Total params: 485,921
Trainable params: 485,921
Non-trainable params: 0
_________________________________________________________________
None


In [107]:
import os
import pandas as pd

directory = "./keras_stats/"
dicts = []
for file in os.listdir(directory):
    f = open(f'{directory}{file}')
    dicts.append(json.load(f))

functions = keras_bitcoin.available_activation_functions

perfs_stats = {"function" : [], "min" : [], "max" : [], "avg" : []}
for function in functions:
    sum_perfs = 0
    min_perfs = 1
    max_perfs = -1
    for dct in dicts:
        value = dct[function]
        sum_perfs += value
        if value > max_perfs:
            max_perfs = value
        if value < min_perfs:
            min_perfs = value
    perfs_stats["function"].append(function)
    perfs_stats["avg"].append(sum_perfs / len(functions))
    perfs_stats["max"].append(max_perfs)
    perfs_stats["min"].append(min_perfs)

print(pd.DataFrame(perfs_stats))
#softmax softplus softsign sigmoid hard_sigmoid  

        function       min       max       avg
0           tanh  0.000000  0.619942  0.382144
1            elu  0.000000  0.589487  0.054955
2        softmax  0.617438  0.617438  0.449046
3           selu  0.000000  0.622862  0.267266
4       softplus  0.415102  0.624948  0.409944
5       softsign  0.574885  0.617856  0.441006
6           relu  0.000000  0.622028  0.161262
7        sigmoid  0.600334  0.626617  0.445102
8   hard_sigmoid  0.592407  0.619942  0.445102
9    exponential  0.000000  0.624531  0.320514
10        linear  0.000000  0.618273  0.208367


In [6]:
functions = ["softmax", "softplus", "softsign", "sigmoid", "hard_sigmoid"]
def get_combinations(arr):
    combinations = []
    for i in range(len(arr)):
        item = arr[i]
        for j in range(i + 1, len(arr)):
            combinations.append([item, arr[j]])
    return combinations

In [14]:
import json

epochs = 5
batch_size = None
perfs = {}
perfs["number_files_used"] = 12
functions_list = [["sigmoid","hard_sigmoid"]]
dropout_value = 0.2
for functions in functions_list:
    functions = list(functions)
    dropouts = {}
    #dropouts[functions[0]] = 0.1
    #dropouts[functions[1]] = 0.08
    model = keras_bitcoin.get_model(texts_train = texts_train, labels_train = labels_train, vocab_length = vocab_length, max_sentence_size = max_sentence_size, epochs = epochs, 
    batch_size = batch_size, activations_functions = functions, verbose = 0, dropouts = dropouts)
    loss, accuracy = model.evaluate(texts_test, labels_test, verbose=0)
    #perfs["_".join(functions)] = accuracy
    print("acc : ", accuracy)


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_4 (Embedding)      (None, 1495, 20)          501200    
_________________________________________________________________
flatten_4 (Flatten)          (None, 29900)             0         
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 29901     
_________________________________________________________________
dense_11 (Dense)             (None, 1)                 2         
Total params: 531,103
Trainable params: 531,103
Non-trainable params: 0
_________________________________________________________________
None
acc :  0.6556636691093445


In [96]:
last_date = "2010-10-11"
for i in range(200):
    df_test = dataset.get_labeled_dataset(number_of_file = 5, from_date = last_date, date_included = False)
    preds = keras_bitcoin.get_predictions(list(df_test["text"]), model, vocab_length, max_sentence_size)
    df_test["preds"] = list(map(lambda x : int(x), preds))
    df_test["correct"] = np.equal(preds, df_test["label"])
    #dataset.get_prediction_stats(df_test)
    last_date = list(df_test["date"])[-1]

Number Correct/Wrong Guess : 2938/448
              Accuracy : 86.7690490253987
Invalid sentences count 0
Number Correct/Wrong Guess : 0/2560
              Accuracy : 0.0
Invalid sentences count 0
Number Correct/Wrong Guess : 3020/1403
              Accuracy : 68.27944833823197
Invalid sentences count 0
Number Correct/Wrong Guess : 1319/880
              Accuracy : 59.9818099135971
Invalid sentences count 0
Number Correct/Wrong Guess : 853/1376
              Accuracy : 38.26828174069089
Invalid sentences count 0
Number Correct/Wrong Guess : 1038/611
              Accuracy : 62.94724075197089
Invalid sentences count 0
Number Correct/Wrong Guess : 693/574
              Accuracy : 54.69613259668509
Invalid sentences count 0
Number Correct/Wrong Guess : 1418/332
              Accuracy : 81.02857142857142
Invalid sentences count 0
Number Correct/Wrong Guess : 702/1022
              Accuracy : 40.71925754060325
Invalid sentences count 0
Number Correct/Wrong Guess : 722/515
              Accu

ZeroDivisionError: division by zero

In [None]:
 functions = ["softmax", "softplus", "softsign", "sigmoid", "hard_sigmoid"]
    ''' "softmax_softplus": 0.6580958962440491,
  "softmax_softsign": 0.6580958962440491,
  "softmax_sigmoid": 0.6580958962440491,
  "softmax_hard_sigmoid": 0.6580958962440491,
  "softplus_softsign": 0.6580958962440491,
  "softplus_sigmoid": 0.7466990947723389,
  "softplus_hard_sigmoid": 0.6580958962440491,
  "softsign_sigmoid": 0.7289784550666809,
  "softsign_hard_sigmoid": 0.7338429689407349,
  "sigmoid_hard_sigmoid": 0.7515636086463928'''
    
    {
  "softplus_sigmoid": 0.7466990947723389,
  "softsign_sigmoid": 0.7289784550666809,
  "softsign_hard_sigmoid": 0.7338429689407349,
  "sigmoid_hard_sigmoid": 0.7515636086463928
}
[["softplus", "sigmoid"], ["softsign", "sigmoid"], ["softsign", "hard_sigmoid"], ["sigmoid", "hard_sigmoid"]]