In [None]:
import os
from statistics import mean
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# https://stackoverflow.com/questions/56820327/the-name-tf-session-is-deprecated-please-use-tf-compat-v1-session-instead
import tensorflow.compat.v1 as tf
from keras.preprocessing import sequence
from keras.models import Sequential, Model
from keras.layers import Dense, Embedding, Input, Concatenate, LSTM

In [None]:
def get_min_max(dataset):
  min = np.amin(dataset['Close'])
  max = np.amax(dataset['Close'])
  return min, max

In [None]:
def get_number_for_date(dataset, date):
  date_num = dataset.loc[dataset.isin([date]).any(axis=1)].index.values
  if date_num:
    date_num = date_num.item()
  else:
    date_obj = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
    first_date = dataset['Date'][0]
    first_date_obj = datetime.strptime(first_date, '%Y-%m-%d %H:%M:%S')
    first_diff = (date_obj - first_date_obj).days
    if first_diff < 0:
      date_num = first_diff
    else:
      last_date = dataset['Date'][len(dataset) - 1]
      last_date_obj = datetime.strptime(last_date, '%Y-%m-%d %H:%M:%S')
      last_diff = (date_obj - last_date_obj).days
      date_num = len(dataset) + last_diff
  return date_num

In [None]:
def get_window(dataset, window_length, date, flag = False):
  row = get_number_for_date(dataset, date)
  if flag:
    end = row
  else:
    end = row - 1
  start = end - window_length
  window = []
  for i in range(start, end):
    value = 0
    try:
      value = dataset.loc[i]['Close']
    except KeyError as e:
      value = np.average(dataset['Close'])
    window.append(value)
  window = np.array(window)
  window = np.reshape(window, (window_length, -1))
  return window

In [None]:
def vector_of_words(valuta, index, broj_valuta):
  vec = [0]*broj_valuta
  vec[index] = 1
  return vec

In [None]:
def get_random(input_windows, input_valute, output):
  length = input_windows.shape[0]
  indices = np.random.permutation(length)
  return input_windows[indices], input_valute[indices], output[indices]

In [None]:
def create_model(window_length, embedding_length):
  nlp_input = Input(shape=(50, ), name='valuta')  
  vl_input = Input(shape=(window_length, 1), name='window')
  emb = Embedding(input_dim=50, output_dim=embedding_length)(nlp_input)
  full_input = Concatenate(axis = 1)([emb, vl_input])
  x = Dense(256, activation = 'relu', input_shape = (window_length+embedding_length, 1))(full_input)
  x = LSTM(256, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = False)(x)
  x = Dense(256, activation = 'relu')(x)
  x = Dense(1)(x)
  model = Model(inputs=[nlp_input, vl_input], outputs=[x])
  model.compile(optimizer='adam',loss='mse',metrics=['accuracy'])
  return model

In [None]:
def plot_valuta(ime_valute, vrednosti, predvidjene_vrednosti, show = False):
  fig = plt.figure(figsize=(15, 5))
  fig.suptitle(ime_valute)
  graph1 = fig.add_subplot(1, 2, 1)

  graph1.set_xlabel('datum')
  graph1.set_ylabel('vrednost')

  x_osa = np.arange(0, len(predvidjene_vrednosti))

  graph1.plot(x_osa, predvidjene_vrednosti, color='r', label='predvidjene vrednosti')
  graph1.plot(x_osa, vrednosti, color='g', label='realne vrednosti')
  if show:
    plt.show()
  else:
    plt.savefig((f'plots/{ime_valute}.png'))
    plt.close(fig)

In [None]:
def predict_for_inputs(index_valute):
  test_valute_vectors = []
  test_windows = []
  test_outputs = []
  for valuta, window, real_value in zip(test_data_valute_input, test_data_windows_input, test_data_output):
    if valuta[index_valute] == 1:
      test_valute_vectors.append(valuta)
      test_windows.append(window)
      test_outputs.append(real_value)
  vrednosti = np.array(test_outputs)
  test_valute_vectors = np.asarray(test_valute_vectors)
  test_windows = np.asarray(test_windows)
  predvidjene_vrednosti = model.predict_on_batch([test_valute_vectors, test_windows])
  return vrednosti, predvidjene_vrednosti

In [None]:
def predict_for_name(ime_valute):
  for i, dataset in enumerate(data_list):
    if dataset.name == ime_valute:
      min, max = get_min_max(dataset)
      vrednosti, predvidjene_vrednosti = predict_for_inputs(i)
      predvidjene_vrednosti = (predvidjene_vrednosti*(max-min)+min)
      vrednosti = (vrednosti*(max-min)+min)
      plot_valuta(ime_valute, vrednosti, predvidjene_vrednosti, show = True)

In [None]:
directory_name = 'data/'
data_list = []

for filename in os.listdir(directory_name):
  if not filename.endswith('.csv'):
    continue
  f = os.path.join(directory_name, filename)
  filename = filename.replace('.csv', '')

  dataset = pd.read_csv(f)
  dataset.name = filename

  data_list.append(dataset)

broj_valuta = len(data_list)

In [None]:
window_length = 7

training_data_windows_input = []
training_data_valute_input = []
training_data_output = []
test_data_windows_input = []
test_data_valute_input = []
test_data_output = []
test_dates = []

for i, dataset in enumerate(data_list):
  min, max = get_min_max(dataset)
  normalized_dataset = dataset.apply(lambda x: (x-min)/(max-min) if x.name == 'Close' else x, axis = 0)
  normalized_dataset.name = dataset.name
  num_samples = len(normalized_dataset)
  divider = int(num_samples*0.8)
  if num_samples - divider < 15:
    divider = num_samples - 15
  for index, row in normalized_dataset.iterrows():
    window = get_window(normalized_dataset, window_length, row['Date'])
    close_price = row['Close']
    if index < divider:
      training_data_windows_input.append(window)
      training_data_valute_input.append(vector_of_words(normalized_dataset.name, i, broj_valuta))
      training_data_output.append(close_price)
    else:
      test_data_windows_input.append(window)
      test_data_valute_input.append(vector_of_words(normalized_dataset.name, i, broj_valuta))
      test_data_output.append(close_price)
      test_dates.append(row['Date'])

In [None]:
training_data_windows_input = np.array(training_data_windows_input)
training_data_valute_input = np.array(training_data_valute_input)
training_data_output = np.array(training_data_output)
test_data_windows_input = np.array(test_data_windows_input)
test_data_valute_input = np.array(test_data_valute_input)
test_data_output = np.array(test_data_output)

X_train_windows, X_train_valute, Y_train = get_random(training_data_windows_input, training_data_valute_input, training_data_output)

X_test_windows = np.array(test_data_windows_input)
X_test_valute = np.array(test_data_valute_input)
Y_test = np.array(test_data_output)

tf.keras.backend.clear_session()

num_epochs = 25
embedding_length = 1

model = create_model(window_length, embedding_length)
model.fit([X_train_valute, X_train_windows], Y_train, epochs = num_epochs)

_, acc = model.evaluate([X_test_valute, X_test_windows], Y_test)
print('Accuracy na test skupu: ', acc)

predict_for_name('BTC-USD')

In [None]:
for i, dataset in enumerate(data_list):
  valuta = dataset.name
  min, max = get_min_max(dataset)
  vrednosti, predvidjene_vrednosti = predict_for_inputs(i)
  predvidjene_vrednosti = (predvidjene_vrednosti*(max-min)+min)
  vrednosti = (vrednosti*(max-min)+min)
  plot_valuta(valuta, vrednosti, predvidjene_vrednosti)