In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from TimeindexProcessing import TimeindexProcessing

In [7]:
def series_to_input_layer(series, sequence_size = 1):
    '''Converts a time series to input layer of neural networks of defined sequence_size

    Paraemters
    ----------
    series:  time series
    a time series to be splitted into samples of samples_size
    
    sequence_size: int
    size of samples to be considered as one sequence
    
    Returns:
    -------
    input_layer: list
        A list of sequence of series of sequence_size 
    '''
    input_layer = []
    for i in range(len(series)-sequence_size):
        sequence = series[i:i+sequence_size].tolist()
        input_layer.append(sequence)
    return input_layer

In [8]:
def series_to_output_layer(series, sequence_size = 1):
    '''Converts a time series to output layer of neural networks

    Paraemters
    ----------
    series:  time series
    a time series to be splitted into samples of samples_size
    
    sequence_size: int
    size of samples to be considered as one sequence
    
    Returns:
    -------
    output: list
        A output layer list 
    '''
    output_layer = []
    for i in range(len(series)-sequence_size):
        output = series[i+sequence_size].tolist()
        # print(i, output)
        output_layer.append(output)
    return output_layer


**Univariate series**

In [9]:
df = pd.read_csv('./Data/Kaggle_PJME/''PJME_hourly.csv')

In [10]:
# Convert a column 'datetime' into time index
index_processing = TimeindexProcessing()
indexed_df = index_processing.convert_column_to_timeindex(df, column_name= 'Datetime')
print(indexed_df.index[0], indexed_df.index[-1])
indexed_df.head()

2002-01-01 01:00:00 2018-08-03 00:00:00


Unnamed: 0_level_0,PJME_MW
Datetime,Unnamed: 1_level_1
2002-01-01 01:00:00,30393.0
2002-01-01 02:00:00,29265.0
2002-01-01 03:00:00,28357.0
2002-01-01 04:00:00,27899.0
2002-01-01 05:00:00,28057.0


In [11]:
series = indexed_df['PJME_MW'].values
sequence_size = 3 # Defining the size of cnn sequence layer
X = series_to_input_layer(series, sequence_size)
y = series_to_output_layer(series, sequence_size)

In [13]:
for i in range(0,4):
	print(X[i], y[i])

[30393.0, 29265.0, 28357.0] 27899.0
[29265.0, 28357.0, 27899.0] 28057.0
[28357.0, 27899.0, 28057.0] 28654.0
[27899.0, 28057.0, 28654.0] 29308.0


**Multivariate series**

The output of time series is dependent on two or more input time series. The below example illustrates the data preparation for the same. 

Ref.: https://machinelearningmastery.com/how-to-develop-convolutional-neural-network-models-for-time-series-forecasting/

In [None]:
X1 = np.arange(0, 100, 10)
X2 = np.arange(5, 105, 10)
Y = X1+X2

In [25]:
X1_seq = series_to_input_layer(X1, sequence_size)
# X2_seq = series_to_input_layer()
X2_seq = series_to_input_layer(X2, sequence_size)

In [30]:
pd.DataFrame([X1, X2]).T

Unnamed: 0,0,1
0,0,5
1,10,15
2,20,25
3,30,35
4,40,45
5,50,55
6,60,65
7,70,75
8,80,85
9,90,95


In [27]:
X1_seq, X2_seq

([[0, 10, 20],
  [10, 20, 30],
  [20, 30, 40],
  [30, 40, 50],
  [40, 50, 60],
  [50, 60, 70],
  [60, 70, 80]],
 [[5, 15, 25],
  [15, 25, 35],
  [25, 35, 45],
  [35, 45, 55],
  [45, 55, 65],
  [55, 65, 75],
  [65, 75, 85]])

In [None]:

# Determine a list of duplicates index and build a new dataframe by keeping only first row of duplicates index
duplicates_index_list, duplicates_corrected_df = index_processing.duplicate_timeindex(indexed_df)

# Duplicates index in the original dataframe can be checked by:
indexed_df.loc[duplicates_index_list]

# Duplicates corrected dataframe for original dataframe's duplicates:
duplicates_corrected_df.loc[duplicates_index_list]
# print(duplicates_corrected_df.index[0], duplicates_corrected_df.index[-1])

# Identify missing index based on declared data frequency and add these rows into duplicates corrected dataframe
data_freq = '1H' # Define the expected data frequency
missing_index_list, rows_added_df = index_processing.missing_timeindex(duplicates_corrected_df, data_freq)
# print(rows_added_df.index[0], rows_added_df.index[-1])
# To confirm whether missing rows are added in dataframe:
# rows_added_df.loc[missing_index_list]
cleaned_df = rows_added_df.copy()
cleaned_df.shape
# cnn_df = cleaned_df.iloc[0:10, :]
cnn_df = cleaned_df.copy()

There are 4 duplicate index in the time series. 
There are 30 missing index in the time series


  expected_index = pd.date_range(start= start_time,
  rows_added_df = pd.concat([rows_added_df, missing_df], axis = 'index')


In [1]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D

NotFoundError: dlopen(/opt/homebrew/Cellar/python@3.9/3.9.13_1/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/tensorflow-plugins/libmetal_plugin.dylib, 0x0006): symbol not found in flat namespace '__ZN10tensorflow8internal10LogMessage16VmoduleActivatedEPKci'

In [55]:
# define model
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

NameError: name 'Sequential' is not defined

In [49]:
series = np.array([10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120])