In [None]:
#===========================================================================================
#===========================================================================================
#=================================== LSTM FOR FORECASTING ==================================
#===========================================================================================
#===========================================================================================


from pandas import DataFrame
import pandas as pd

from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import MinMaxScaler

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM

from math import sqrt

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf

import statsmodels.api as sm

from matplotlib import pyplot
from matplotlib.pylab import rcParams

pyplot.style.use('fivethirtyeight')
rcParams['figure.figsize'] = 15, 6

from numpy import array
import numpy as np

import winsound

Using TensorFlow backend.


ModuleNotFoundError: No module named 'numpy.core._multiarray_umath'

SystemError: <class '_frozen_importlib._ModuleLockManager'> returned a result with an error set

ImportError: numpy.core._multiarray_umath failed to import

ImportError: numpy.core.umath failed to import

In [20]:
# date-time parsing function for loading the dataset
def parser(x):
	return datetime.strptime('190'+x, '%Y-%m')

In [None]:
# convert time series into supervised learning problem
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols, names = list(), list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
		names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
		if i == 0:
			names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
		else:
			names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
	# put it all together
	agg = concat(cols, axis=1)
	agg.columns = names
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
        
	return agg

In [None]:
# create a differenced series
def difference(dataset, interval=1):
	diff = list()
	for i in range(interval, len(dataset)):
		value = dataset[i] - dataset[i - interval]
		diff.append(value)
        
	return Series(diff)

In [None]:
# Overwrite NaNs with column value interpolations.
def interpolate_nans(X):
    for j in range(X.shape[1]):
        mask_j = np.isnan(X[:,j])
        X[mask_j,j] = np.interp(np.flatnonzero(mask_j), np.flatnonzero(~mask_j), X[~mask_j,j])
    return X

In [2]:
# transform series into train and test sets for supervised learning
def prepare_data(series, n_test, n_lag, n_seq):
    
	# extract raw values
	raw_values = series
	raw_values[np.isnan(raw_values)] = 0
    
	# transform data to be stationary
	diff_series = difference(raw_values, 1)
	diff_values = diff_series.values
	diff_values = diff_values.reshape(len(diff_values), 1)
    
	# rescale values to -1, 1
	scaler = MinMaxScaler(feature_range=(-1, 1))
	scaled_values = scaler.fit_transform(diff_values) 
	scaled_values = scaled_values.reshape(len(scaled_values), 1)
    
	# transform into supervised learning problem X, y
	supervised = series_to_supervised(scaled_values, n_lag, n_seq)
	supervised_values = supervised.values
    
	# split into train and test sets
	train, test = supervised_values[0:-n_test], supervised_values[-n_test:]
    
	return scaler, train, test

In [3]:
# fit an LSTM network to training data
def fit_lstm(train, n_lag, n_seq, n_batch, nb_epoch, n_neurons):
    
	# reshape training into [samples, timesteps, features]
	X, y = train[:, 0:n_lag], train[:, n_lag:]
	X = X.reshape(X.shape[0], 1, X.shape[1])
    
	# design network
	model = Sequential()
	model.add(LSTM(n_neurons, batch_input_shape=(n_batch, X.shape[1], X.shape[2]), stateful=True))
	model.add(Dense(y.shape[1]))
	model.compile(loss='mean_squared_error', optimizer='adam')
    
	# fit network
	for i in range(nb_epoch):
		model.fit(X, y, epochs=1, batch_size=n_batch, verbose=0, shuffle=False)
		model.reset_states()
        
	return model

In [4]:
# make one forecast with an LSTM,
def forecast_lstm(model, X, n_batch):
    
	# reshape input pattern to [samples, timesteps, features]
	X = X.reshape(1, 1, len(X))
    
	# make forecast
	forecast = model.predict(X, batch_size=n_batch)
    
	# convert to array    
	return [x for x in forecast[0, :]]

In [5]:
# invert differenced forecast
def inverse_difference(last_ob, forecast):
    
	# invert first forecast
	inverted = list()
	inverted.append(forecast[0] + last_ob)
    
	# propagate difference forecast using inverted first value
	for i in range(1, len(forecast)):
		inverted.append(forecast[i] + inverted[i-1])
        
	return inverted

In [6]:
# evaluate the persistence model
def make_forecasts(model, n_batch, train, test, n_lag, n_seq):
	forecasts = list()
	for i in range(len(test)):
		X, y = test[i, 0:n_lag], test[i, n_lag:]
        
		# make forecast
		forecast = forecast_lstm(model, X, n_batch)
        
		# store the forecast
		forecasts.append(forecast)
        
	return forecasts

In [7]:
# inverse data transform on forecasts
def inverse_transform(series, forecasts, scaler, n_test):
    
	inverted = list()
	for i in range(len(forecasts)):
        
		# create array from forecast
		forecast = array(forecasts[i])
		forecast = forecast.reshape(1, len(forecast))
        
		# invert scaling
		inv_scale = scaler.inverse_transform(forecast)
		inv_scale = inv_scale[0, :]
        
		# invert differencing
		index = len(series) - n_test + i - 1
		last_ob = series.values[index]
		inv_diff = inverse_difference(last_ob, inv_scale)
        
		# store
		inverted.append(inv_diff)
        
	return inverted

In [8]:
# evaluate the RMSE for each forecast time step

def evaluate_forecasts(test, forecasts, n_lag, n_seq, filename):
	dfxx = pd.DataFrame(columns=['FILENAME','PREDICTION #','MEAN ABSOLUTE ERROR','MEAN SQUARED ERROR','ROOT MEAN SQUARED ERROR'], index = [1])
    
	for i in range(n_seq):
		actual = [row[i] for row in test]
		predicted = [forecast[i] for forecast in forecasts]
          
		MAE = mean_absolute_error(actual, predicted)       
		MSE = mean_squared_error(actual, predicted)
		RMSE = sqrt(mean_squared_error(actual, predicted))
    
		dfxx = dfxx.append({'FILENAME': filename, 'PREDICTION #': (i+1), 'MEAN ABSOLUTE ERROR': MAE, 
                     'MEAN SQUARED ERROR': MSE, 'ROOT MEAN SQUARED ERROR': RMSE}, ignore_index=True)      
        
# original code printed RMSE; replaced by storing to array and transferring to main()        
#		print('t+%d RMSE: %f' % ((i+1), rmse))

#	print('df: ', dfxx)

	return dfxx

In [9]:
import matplotlib as mpl

# plot the forecasts in the context of the original dataset
def plot_forecasts(series, Dates, filename, forecasts, n_test, showplot):
    
	rcParams['figure.figsize'] = 15, 6
    
	# plot the entire dataset in blue
	pyplot.plot(series.values, color='blue')
        
	# picks a color theme for the prediction lines
	cmap = mpl.cm.cool
    
	# plot the forecasts in red
	for i in range(len(forecasts)):        
        
		off_s = len(series) - n_test + i - 1
		off_e = off_s + len(forecasts[i]) + 1
		xaxis = [x for x in range(off_s, off_e)]
		yaxis = [series.values[off_s]] + forecasts[i]
        
		pyplot.title(filename, loc='center') 
		pyplot.plot(xaxis, yaxis, ':', color=cmap(i / float(len(forecasts))), lw=3)
        
		if i == 0:            
			d =  {('Prediction'+str(i+1)): pd.Series([series.values[off_s]] + forecasts[i], index=[xaxis])}    
			df = pd.DataFrame(d) 

		else:
			d2 = {('Prediction'+str(i+1)): pd.Series([series.values[off_s]] + forecasts[i], index=[xaxis])} 
			df2 = pd.DataFrame(d2)
			df=df.add(df2, fill_value=0)

	# show the plot
	if showplot == 'y':
		pyplot.savefig(filename.replace(".csv", "") + "_PREDICTIONS.jpg")
		pyplot.show()

	else:
		pyplot.close()
        
	del d
	del d2
	del df2
	return df

In [10]:
def yes_or_no(question):
	answer = input(question + "(Y/N): ").upper().strip()

	while not(answer == "Y" or answer == "N"):
		print("Input Y or N")
		answer = input(question + "(Y/N): ").upper().strip()
	if answer[0] == "Y":
		return True
	else:
		return False

In [11]:
def additive_seasonal_decomp(series, frq):
	rcParams['figure.figsize'] = 15, 7

	result = seasonal_decompose(series.values, model='additive', freq=1)
	result.plot()
	print('Additive Decomposition Plot')
	pyplot.show()

In [12]:
def multiplicative_seasonal_decomp(series, frq):
	rcParams['figure.figsize'] = 15, 7
 
	result = seasonal_decompose(series.values, model='multiplicative', freq=1)
	result.plot()
	print('Multiplicative Decomposition Plot')
	pyplot.show()

In [13]:
filenames = []
#filenames = ['ge.us.csv']
#filenames = ['WKHS.csv']
filenames = ['FIBER_NODE_DI_PREDICTABILITY.CSV']

In [15]:
#===========================================================================================
#========================================== PREP ===========================================
#===========================================================================================
# loop through all datasets chosen in numpy array above

# set ending beep duration in seconds
beepduration = 1

tstarta = datetime.now()
print('Start Time: ', tstarta.strftime("%H:%M:%S"))

RMSE_ARR = np.array([])
df = []
df3 = []

for index, item in enumerate(filenames):
          
        # set start time for execution time measurement
        tstart = datetime.now()

        # load dataset
        Actuals = read_csv(item)           
        Actuals.Timestamp = pd.to_datetime(Actuals.DT,format='%m/%d/%Y')     
        Actuals.index = Actuals.Timestamp  
        Actuals.drop('DT', axis=1, inplace=True) 
        
        #Actuals = Actuals.asfreq('M')
        #Actuals = Actuals.resample('M').reset_index()
        Actuals = Actuals.bfill()
        #print(Actuals)

        #Actuals.set_index('DT', inplace=True)
        #Actuals = Actuals.asfreq('M')
        #print(Actuals)

        Dates = read_csv(item, usecols=['DT']) 

        # configure
        startpoint = 1  # how far back to start predictions
        n_lag = 1        # amount of lag included in prediction points
        n_seq = 13       # number of prediction points per prediction iteration
        n_test = 6       # number of visualized predictions iterations
        n_epochs = 100   # number of iterations to improve predictive outcome
        n_batch = 1      # must be 1
        n_neurons = 400   # number of confounding nodes in prediction calculation

        # prepare data
        scaler, train, test = prepare_data(Actuals.TOTAL_ROWS, n_test, n_lag, n_seq)
    
#===========================================================================================
#========================================== RUN ============================================
#===========================================================================================

        # plot seasonal decomposition
            # Level: The average value in the series.
            # Trend: The increasing or decreasing value in the series.
            # Seasonality: The repeating short-term cycle in the series.
            # Noise: The random variation in the series.   
        #additive_seasonal_decomp(Actuals.LIVES, 7)
        #multiplicative_seasonal_decomp(Actuals.LIVES, 7)            

        # plot autocorrelation
        rcParams['figure.figsize'] = 15, 5
        #plot_acf(Actuals, lags=500)
        plot_acf(Actuals.TOTAL_ROWS)
        pyplot.savefig(item.replace(".csv", "") + "_AUTOCORRELATION.jpg", bbox_inches='tight')
        pyplot.show()      

        # fit model
        model = fit_lstm(train, n_lag, n_seq, n_batch, n_epochs, n_neurons)

        # make forecasts
        forecasts = make_forecasts(model, n_batch, train, test, n_lag, n_seq)

        # inverse transform forecasts and test
        forecasts = inverse_transform(Actuals.TOTAL_ROWS, forecasts, scaler, n_test + startpoint)
        actual = [row[n_lag:] for row in test]
        actual = inverse_transform(Actuals.TOTAL_ROWS, actual, scaler, n_test + startpoint)

        # evaluate forecasts
        #evaluate_forecasts(actual, forecasts, n_lag, n_seq, item + '_Zero_nan')    
        RMSE_ARR_df = evaluate_forecasts(actual, forecasts, n_lag, n_seq, item + '_Zero_nan')          

        # plot forecasts
        df = Actuals
        
        
        df3 = (plot_forecasts(Actuals.TOTAL_ROWS, Dates, item, forecasts, n_test + startpoint, 'y'))              
        df3['DT'] = ['2018-12-01','2019-01-01','2019-02-01','2019-03-01','2019-04-01','2019-05-01','2019-06-01','2019-07-01','2019-08-01','2019-09-01','2019-10-01','2019-11-01','2019-12-01','2019-12-02','2019-12-03','2019-12-04','2019-12-05','2019-12-06','2019-12-07']
        df3.set_index('DT', inplace=True)           
        results = pd.concat([df, df3])
        
#        print(df)
#        print(RMSE_ARR_df)
                           
        del Actuals
        del Dates

        tend = datetime.now()
        totaltime = tend - tstart
        d = datetime(1,1,1) + totaltime

        results.to_csv(item.replace(".csv", "") + "_VALUES.CSV")
        RMSE_ARR_df.to_csv(item.replace(".csv", "") + "_RMSE.CSV")     
            
#===========================================================================================
#========================================== DONE ===========================================
#===========================================================================================

frequency = 750  # Set Frequency To 2500 Hertz
duration = beepduration*1000  # Set Duration To 1000 ms == 1 second
winsound.Beep(frequency, duration)

tendd = datetime.now()
totaltimex = tendd - tstarta
d = datetime(1,1,1) + totaltimex
print('Total Time Elapsed (hh:mm:ss.ms) {}'.format(totaltime))

if yes_or_no("Print RMSE for models with nulls replaced with 0?: ") == True:
    print(RMSE_ARR)

del RMSE_ARR

Start Time:  16:44:39


NameError: name 'np' is not defined

In [None]:

# In[41]:

#===========================================================================================
#===========================================================================================
#==================================== LSTM TIMER TEST ======================================
#===========================================================================================
#===========================================================================================



# In[2]:

# use to insure memory is cleared after run.
# import sys
# def sizeof_fmt(num, suffix='B'):
#     ''' By Fred Cirera, after https://stackoverflow.com/a/1094933/1870254'''
#     for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
#         if abs(num) < 1024.0:
#             return "%3.1f%s%s" % (num, unit, suffix)
#         num /= 1024.0
#     return "%.1f%s%s" % (num, 'Yi', suffix)

# for name, size in sorted(((name, sys.getsizeof(value)) for name,value in locals().items()),
#                          key= lambda x: -x[1])[:10]:
#     print("{:>30}: {:>8}".format(name,sizeof_fmt(size)))


# In[ ]:




# In[154]:

from pandas import DataFrame
import pandas as pd

from pandas import Series
from pandas import concat
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot

from numpy import array
import numpy as np

import winsound


# In[155]:

from scipy.stats import boxcox
from scipy.special import inv_boxcox


# In[156]:

import fbprophet


# In[157]:

# load dataset
Actuals = read_csv('AMAZON2.csv')       
Actuals.Timestamp = pd.to_datetime(Actuals.DT,format='%m/%d/%Y')  
Actuals.index = Actuals.Timestamp  
Actuals.drop('DT', axis=1, inplace=True) 

Actuals = Actuals.asfreq('d')
Actuals = Actuals.resample('d').bfill()
Actuals = Actuals.bfill()


# In[158]:

df.shape


# In[159]:

df.dtypes


# In[160]:

print(Actuals.head(20))


# In[162]:

pyplot.plot(Actuals.index, Actuals.LIVES, ':', lw=3)
pyplot.show()


# In[163]:

df['ds'] = df.index
df['y'] = df['LIVES']


# In[164]:

# Apply Box-Cox Transform to value column and assign to new column y
df['y'], lam = boxcox(df['LIVES'])


# In[165]:

m = fbprophet.Prophet()


# In[169]:

m.fit(df)


# In[168]:

future = m.make_future_dataframe(periods=12)

print(future)
# In[99]:

forecast = m.predict(future)


# In[100]:

m.plot(forecast);


# In[101]:

m.plot_components(forecast);


# In[102]:

# Apply inverse Box-Cox transform to specific forecast columns
forecast[['yhat','yhat_upper','yhat_lower']] = forecast[['yhat','yhat_upper','yhat_lower']].apply(lambda x: inv_boxcox(x, lam))


# In[ ]:

# In[1]:

#import packages
import pandas as pd
import numpy as np

#to plot within notebook
import matplotlib.pyplot as plt
get_ipython().magic('matplotlib inline')

#setting figure size
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20,10

#for normalizing data
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))

#read the file
df = pd.read_csv('ge.us.csv')

#print the head
#df.head()


# In[2]:

#importing required libraries
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM


# In[3]:

#creating dataframe
data = df.sort_index(ascending=True, axis=0)
new_data = pd.DataFrame(index=range(0,len(df)),columns=['DT', 'LIVES'])
for i in range(0,len(data)):
    new_data['DT'][i] = data['DT'][i]
    new_data['LIVES'][i] = data['LIVES'][i]


# In[4]:

#setting index
new_data.index = new_data.DT
new_data.drop('DT', axis=1, inplace=True)

#creating train and test sets
dataset = new_data.values

train = dataset[0:int(len(df)*.8),:]
valid = dataset[int(len(df)*.8):,:]


# In[5]:

#converting dataset into x_train and y_train
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

x_train, y_train = [], []
for i in range(60,len(train)):
    x_train.append(scaled_data[i-60:i,0])
    y_train.append(scaled_data[i,0])
x_train, y_train = np.array(x_train), np.array(y_train)

x_train = np.reshape(x_train, (x_train.shape[0],x_train.shape[1],1))


# In[6]:

# create and fit the LSTM network
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1],1)))
model.add(LSTM(units=50))
model.add(Dense(1))

model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(x_train, y_train, epochs=1, batch_size=1, verbose=2)


# In[7]:

#predicting 20% of values, using past 60 from the train data
inputs = new_data[len(new_data) - len(valid) - 60:].values
inputs = inputs.reshape(-1,1)
inputs  = scaler.transform(inputs)

X_test = []
for i in range(60,inputs.shape[0]):
    X_test.append(inputs[i-60:i,0])
X_test = np.array(X_test)

X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
closing_price = model.predict(X_test)
closing_price = scaler.inverse_transform(closing_price)


# In[8]:

rms=np.sqrt(np.mean(np.power((valid-closing_price),2)))
print(rms)


# In[13]:

#for plotting
train = new_data[:int(len(df)*.8)]
valid = new_data[int(len(df)*.8):]

valid['Predictions'] = closing_price

plt.plot(train['LIVES'])
plt.plot(valid[['LIVES','Predictions']])


# In[12]:

print('DONE')


# In[ ]:

