In [1]:
import sys
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
import tensorflow as tf

In [2]:
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  print('GPU device not found')
else:
  print('Found GPU at: {}'.format(device_name))

GPU device not found


In [3]:
pd.set_option('display.max_columns', 50)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', lambda x: '%.5f' % x)
akarsu_df = pd.read_csv('../RelevantData/akarsu_relevant.csv', parse_dates = ['Tarih'], index_col = 'Tarih')
akarsu_cols = ['Fekal_Koliform', 'Toplam_Koliform', 'Toplam_Fosfor', 'Toplam_Kjeldahl_Azotu', 'Kimyasal_Oksijen_İhtiyacı', 'Nitrat_Azotu', 'Çözünmüş_Oksijen']

In [4]:
# Removing Toplam_Pestisit column because 90% of it is empty.
akarsu_df = akarsu_df.drop(columns = 'Toplam_Pestisit')
# Dropping missing features
akarsu_df = akarsu_df.dropna()
# Manually correcting dates of two samples
akarsu_df.index = akarsu_df.index.str.replace("28:.12.2016", "28.12.2016", regex = True)
akarsu_df.index = akarsu_df.index.str.replace("22.009.2016", "22.09.2016", regex = True)

In [5]:
# Scale features
akarsu_scaled = akarsu_df.copy()
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
for i in akarsu_cols:
    akarsu_scaled[[i]] = scaler.fit_transform(akarsu_df[[i]])

akarsu_scaled.index = pd.to_datetime(akarsu_scaled.index, yearfirst = True)
akarsu_df.index = pd.to_datetime(akarsu_df.index, yearfirst = True)
df = akarsu_scaled

In [6]:
df['chunkID'] = df.groupby('Numune_Adı').ngroup()
df = df.drop(labels = ['Numune_Adı'], axis = 1)
df = df.sort_values(by=['chunkID', 'Tarih'])

In [7]:
# Split Data
test_df = df.loc[df['chunkID'] == 43]
df = df[df['chunkID'] != 43]
data = df.drop(columns = 'chunkID')
test_df = test_df.drop(columns = 'chunkID')

n_timesteps = 4 # Length of the output sequences
batch_size = 8 # Number of timeseries samples in each batch

rows_per_month = 1
test_months = 12 # last # of rows to split
test_indices = test_months * rows_per_month    

train = data.iloc[:-test_indices]
#test = data.iloc[-test_indices:]
test = test_df
test_idx = test_df.index

In [8]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
# Generate timeseries data
train_generator = TimeseriesGenerator(train.values, targets = train.values, length = n_timesteps, batch_size = batch_size)
test_generator = TimeseriesGenerator(test.values, targets = test.values, length = 1, batch_size = 1)

In [17]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import LSTM, Dense, Activation, Input, LeakyReLU

neurons = 100
n_features = 7
# Encoder
encoder_inputs = Input(shape = (n_timesteps, train.shape[1]), name = "encoder_inputs")
encoder_lstm, state_h, state_c = LSTM(neurons, return_sequences = True, return_state=True, name = "encoder_lstm")(encoder_inputs)
encoder_states = [state_h, state_c]
# Decoder
decoder_inputs = Input(shape=(n_timesteps, train.shape[1]), name='decoder_inputs')
decoder_lstm = LSTM(neurons, return_sequences=True, return_state=True, name='decoder_lstm')
decoder_outputs, _, _ = decoder_lstm(decoder_inputs,
                                     initial_state = encoder_states)
decoder_dense = Dense(n_features, activation='relu', name='outputs')(decoder_outputs)
# Create & compile model
model = Model(inputs = [encoder_inputs, decoder_inputs], outputs = decoder_dense)
model.compile(optimizer='adam', loss = 'mse', metrics= ['accuracy'])
model.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
encoder_inputs (InputLayer)     [(None, 4, 7)]       0                                            
__________________________________________________________________________________________________
decoder_inputs (InputLayer)     [(None, 4, 7)]       0                                            
__________________________________________________________________________________________________
encoder_lstm (LSTM)             [(None, 4, 100), (No 43200       encoder_inputs[0][0]             
__________________________________________________________________________________________________
decoder_lstm (LSTM)             [(None, 4, 100), (No 43200       decoder_inputs[0][0]             
                                                                 encoder_lstm[0][1]         

In [18]:
#history = model.fit(train_generator, epochs = 10)

In [30]:
from random import randint
def generate_sequence(length, n_unique):
    return [randint(1, n_unique-1) for _ in range(length)]
def get_encoder_triple(time_steps,vocabulary_size,verbose= False):
    
# generate random sequence
    sequence_in = generate_sequence(time_steps, vocabulary_size)
    encoder_in = sequence_in.copy()
    decoder_out = sequence_in[::-1]
    decoder_in = decoder_out.copy()
    decoder_in.insert(0,0)
    decoder_in.pop()
# one hot encode

# reshape as 3D
    X_encoder_in = encoder_in.reshape((1, encoder_in.shape[0], encoder_in.shape[1]))
    X_decoder_in = decoder_in.reshape((1, decoder_in.shape[0], decoder_in.shape[1]))
    y_decoder_out = decoder_out.reshape((1, decoder_out.shape[0], decoder_out.shape[1]))

    if(verbose):
        print('\nSample X_encoder_in X_decoder_in and y_decoder_out')
        print('\nIn raw format:')
        print('X_encoder_in=%s, X_decoder_in=%s, y_decoder_out=%s' % 
              (one_hot_decode(X_encoder_in[0]), one_hot_decode(X_decoder_in[0]), 
               one_hot_decode(y_decoder_out[0])))
        print('\nIn one_hot_encoded format:')
        print('X_encoder_in=%s' % (X_encoder_in[0]))
        print('X_decoder_in=%s' % (X_decoder_in[0]))
        print('y_decoder_out=%s' % (y_decoder_out[0]))
    return [array(X_encoder_in), array(X_decoder_in), array(y_decoder_out)]

In [31]:
def create_encoder_dataset(train_size, test_size, time_steps,vocabulary_size, verbose= False):
    X_encoder_in = list()
    X_decoder_in = list()
    y_decoder_out = list()
    
    for _ in range(train_size):
        triple=get_encoder_triple(time_steps,vocabulary_size) 
        X_encoder_in.append(triple[0])
        X_decoder_in.append(triple[1])
        y_decoder_out.append(triple[2])

    X_encoder_in= array(X_encoder_in).squeeze()
    X_decoder_in= array(X_decoder_in).squeeze()
    y_decoder_out= array(y_decoder_out).squeeze()
    if(verbose):
        print('\nGenerated sequence datasets as follows')
        print('X_encoder_in.shape: ', X_encoder_in.shape)
        print('X_decoder_in.shape: ', X_decoder_in.shape)
        print('y_decoder_out.shape: ', y_decoder_out.shape)
        print('Sample sequences in raw format:')

        print('X_encoder_in: \n', one_hot_decode(X_encoder_in[0]))
        print('X_decoder_in: \n', one_hot_decode(X_decoder_in[0]))
        print('y_decoder_out: \n',one_hot_decode(y_decoder_out[0]))

        print('Sample sequences in one-hot encoded format:')
        print('X_encoder_in: \n', X_encoder_in[0])
        print('X_decoder_in: \n', X_decoder_in[0])
        print('y_decoder_out: \n', y_decoder_out[0])
    return X_encoder_in,X_decoder_in, y_decoder_out

In [32]:
e, d, dp = create_encoder_dataset(100, 100, n_timesteps ,n_features , verbose=True)

AttributeError: 'list' object has no attribute 'reshape'