In [None]:
import tensorflow as tf
from keras.preprocessing import sequence
from keras.models import Sequential,Model
from keras.layers import Flatten, Dense, Embedding
from keras.layers import SimpleRNN, LSTM, GRU,Input
from keras.layers import Concatenate,concatenate
from datetime import datetime
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from statistics import mean

In [None]:
directory_name = 'data/'
data_dict = {}

br = 0

for filename in os.listdir(directory_name):
  f = os.path.join(directory_name, filename)
  filename = filename.replace('.csv', '')

  dataset = pd.read_csv(f)
  max = np.amax(dataset['Close'])
  min = np.amin(dataset['Close'])
  dataset = dataset.apply(lambda x: (x-min)/(max-min) if x.name == 'Close' else x, axis = 0)

  data_dict[filename] = dataset

In [None]:
def get_number_for_date(dataset, date):
  date_num = dataset.loc[dataset.isin([date]).any(axis=1)].index.values
  if date_num:
    date_num = date_num.item()
  else:
    date_obj = datetime.strptime(date, '%Y-%m-%d %H:%M:%S')
    first_date = dataset['Date'][0]
    first_date_obj = datetime.strptime(first_date, '%Y-%m-%d %H:%M:%S')
    first_diff = (date_obj - first_date_obj).days
    if first_diff < 0:
      date_num = first_diff
    else:
      last_date = dataset['Date'][len(dataset) - 1]
      last_date_obj = datetime.strptime(last_date, '%Y-%m-%d %H:%M:%S')
      last_diff = (date_obj - last_date_obj).days
      date_num = len(dataset) + last_diff
  return date_num

In [None]:
def get_window(dataset, window_length, date, flag = False):
  row = get_number_for_date(dataset, date)
  if flag:
    end = row
  else:
    end = row - 1
  start = end - window_length
  window = []
  for i in range(start, end):
    value = 0
    try:
      value = dataset.loc[i]['Close']
    except KeyError as e:
      value = np.average(dataset['Close'])
    window.append(value)
  window = np.array(window)
  window = np.reshape(window, (window_length, -1))
  return window

In [None]:

num_samples = mean([len(dv) for dv in data_dict.values()])
divider = int(num_samples*0.9)

training_data_input = []
training_data_output = []
test_data_input = []
test_data_output = []
test_dates = []

full_dataset = []
for key,value in data_dict.items():
  for index, row in value.iterrows():
    window = get_window(value, window_length, row['Date'])
    close_price = row['Close']
    #window = np.reshape(window,(window_length,-1))
    if index < divider:
      training_data_input.append(np.array([key,np.asarray(window).astype(np.float32)]))
      training_data_output.append(close_price)
    else:
      test_data_input.append(np.array([key,np.asarray(window).astype(np.float32)]))
      test_data_output.append(close_price)
      test_dates.append(row['Date'])


In [None]:
def get_random(input,output):
  lent = input.shape[0]
  indices = np.random.permutation(lent)
  return input[indices],output[indices]

In [None]:
def create_model(window_length,embeding_length):
  model = Sequential()
  nlp_input = Input(shape=(None,), name='valuta')
  vl_input = Input(shape=(None,window_length), name='window')
  emb = Embedding(input_dim=50, output_dim=embeding_length)(nlp_input)
  full_input = concatenate([emb, vl_input])
  model.add(Dense(256, activation = 'relu', input_shape = (window_length+embeding_length, window_length+embeding_length,)))
  model.add(LSTM(256, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = False))
  model.add(Dense(256, activation = 'relu'))
  model.add(Dense(1))
  curr_output = model(full_input)
  model = Model(full_input,curr_output)
  model.compile(optimizer='adam',loss='mse',metrics=['accuracy'])
  return model

In [None]:
def create_model2(window_length,embeding_length):
  model = Sequential()
  nlp_input = Input(shape=(None,), name='valuta')
  vl_input = Input(shape=(None,window_length), name='window')
  emb = Embedding(input_dim=50, output_dim=embeding_length)(nlp_input)
  full_input = concatenate([emb, vl_input])
  x = Dense(256, activation = 'relu', input_shape = (None, window_length+embeding_length))(full_input)
  x = LSTM(256, dropout = 0.2, recurrent_dropout = 0.2, return_sequences = False)(x)
  x = Dense(256, activation = 'relu')(x)
  x = Dense(1)(x)
  curr_output = model(full_input)
  model = Model(inputs=[nlp_input , vl_input], outputs=[x])
  model.compile(optimizer='adam',loss='mse',metrics=['accuracy'])
  return model

In [None]:

window_length = 7
num_epochs = 20
embeding_length = 7


training_data_input = np.asarray(training_data_input)
training_data_output = np.asarray(training_data_output)
test_data_input = np.asarray(test_data_input)
test_data_output = np.asarray(test_data_output)

X_train,Y_train = get_random(training_data_input,training_data_input)

X_test = np.array(test_data_input)
Y_test = np.array(test_data_output)

print(X_train.shape)

model = create_model2(window_length,embeding_length)
model.fit(X_train, Y_train, epochs = num_epochs)
_, acc = model.evaluate(X_test, Y_test)
print('Accuracy na test skupu: ', acc)


In [None]:
def plot_valuta(ime_valute, vrednosti, datumi, predvidjene_vrednosti):
  fig = plt.figure(figsize=(15, 5))
  fig.suptitle(ime_valute)
  graph1 = fig.add_subplot(1, 2, 1)

  '''
  Y_predict = model.predict(X_test)
  X_test = np.reshape(X_test,(1,-1))[0]
  Y_predict = np.reshape(Y_predict,(1,-1))[0]
  print(X_test.shape)
  print(Y_predict.shape)

  Y_predict = (Y_predict*(max-min)+min)
  Y_test = (Y_test*(max-min)+min)
  '''

  graph1.set_xlabel('datum')
  graph1.set_ylabel('vrednost')
  graph1.plot(datumi, predvidjene_vrednosti, color='r', label='predvidjene vrednosti')
  graph1.plot(datumi, vrednosti, color='g', label='realne vrednosti')