In [9]:
print('STOCK PREDICTION USING RNN LSTM')
import numpy as np
import pandas as pd
from sklearn import preprocessing
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
from keras.layers import  Dropout
from keras.models import model_from_json
from keras.models import load_model
from keras import regularizers

import matplotlib.pyplot as pltb
import matplotlib.ticker as mtick
fmt = '$%.0f'
tick = mtick.FormatStrFormatter(fmt)

%matplotlib inline

STOCK PREDICTION USING RNN LSTM


In [10]:
# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)
import os

IMAGES_PATH=r"C:\Users\Shaaf Abdullah\Pictures\Plots"
def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [13]:
path = r"C:\Users\Shaaf Abdullah\FYP CODES\Stocks_Data"
directory=os.listdir(path)
Train_MSE={}
Test_MSE={}
Epochs={}

In [17]:
for file in directory:
    if file.endswith('_final.csv'):
        keras.backend.clear_session()
        data_csv = pd.read_csv(file)
        name=file.strip('_final.csv')
        data_csv[['Close']].plot()
        save_fig(name+'_closing_price_curve')
        plt.clf()
        percentage_of_data = 1.0
        data_to_use = int(percentage_of_data*(len(data_csv)-1))

         # 80% of data will be of training
        train_end = int(data_to_use*0.8)

        total_data = len(data_csv)
        print("total_data:", total_data)
        start = total_data - data_to_use

        # Currently doing prediction only for 1 step ahead
        steps_to_predict = 1

        #close, compund, neg, neu, pos, open, high, low, volume
        # Order -> 8,1,2,3,4,5,6,7,9
        yt = data_csv.iloc[start:total_data,8] #close
        yt1 = data_csv.iloc[start:total_data,1] #compund
        yt2 = data_csv.iloc[start:total_data,2] #neg
        yt3 = data_csv.iloc[start:total_data,3] #neu
        yt4 = data_csv.iloc[start:total_data,4] #pos
        yt5 = data_csv.iloc[start:total_data,5] #open
        yt6 = data_csv.iloc[start:total_data,6] #high
        yt7 = data_csv.iloc[start:total_data,7] #low
        vt = data_csv.iloc[start:total_data,9] #volume

        print("yt head (close) :")
        print(yt.head())
        #shift next day close and next day compund
        yt_ = yt.shift(-1) #shifted close
        yt1_ = yt1.shift(-1) #shifted compund

       #taking only: close, next_close, compund, next_compund, volume, open, high, low
        data = pd.concat([yt, yt_, yt1, yt1_, vt, yt5, yt6, yt7], axis=1)
        data.columns = ['yt', 'yt_', 'yt1', 'yt1_','vt', 'yt5', 'yt6', 'yt7']

        data = data.dropna()
     
        print(data[:10])
        # target variable - closed price
        # after shifting
        y = data['yt_'] #next_close

        # close, compund, next_compund, volume, open, high, low   
        cols = ['yt', 'yt1', 'yt1_', 'vt', 'yt5', 'yt6', 'yt7']
        x = data[cols]
        scaler_x = preprocessing.MinMaxScaler (feature_range=(-1, 1))
        x = np.array(x).reshape((len(x) ,len(cols)))
        x = scaler_x.fit_transform(x)

        scaler_y = preprocessing.MinMaxScaler (feature_range=(-1, 1))
        y = np.array (y).reshape ((len( y), 1))
        y = scaler_y.fit_transform (y)
        X_train = x[0 : train_end,]
        X_test = x[train_end+1 : len(x),]    
        y_train = y[0 : train_end] 
        y_test = y[train_end+1 : len(y)]  

        X_train = X_train.reshape (X_train. shape + (1,)) 
        X_test = X_test.reshape(X_test.shape + (1,))
        batch_size = 32
        if(data_csv.shape[0]<1000):
            nb_epoch = 100
        else:
            nb_epoch = 150
        neurons = 25
        dropout = 0.1

        seed = 2016
        np.random.seed(seed)

        model = Sequential ()
        model.add(LSTM(neurons, return_sequences=True, activation='tanh', inner_activation='hard_sigmoid', input_shape=(len(cols), 1)))
        model.add(Dropout(dropout))
        model.add(LSTM(neurons, return_sequences=True,  activation='tanh'))
        model.add(Dropout(dropout))
        model.add(LSTM(neurons, activation='tanh'))
        model.add(Dropout(dropout))

        model.add(Dense(activity_regularizer=regularizers.l1(0.00001), output_dim=1, activation='linear'))
        model.add(Activation('tanh'))

        print(model.summary())
        model.compile(loss='mean_squared_error' , optimizer='RMSprop')
        history=model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=1, validation_split=0.2)
        score_train = model.evaluate(X_train, y_train, batch_size =1)
        score_test = model.evaluate(X_test, y_test, batch_size =1)
        
        Train_MSE[name]=round( score_train ,4)
        Test_MSE[name]=round( score_test ,4)
        Epochs[name]=nb_epoch
        
        print("in train MSE = ", round( score_train ,4)) 
        print("in test MSE = ", round(score_test ,4))
        model.save(name+"_model.h5")
        model.save_weights(name+"_weights.ckpt")
        print("Saved model to disk")
        pred = model.predict(X_test) 
        pred = scaler_y.inverse_transform(np.array(pred).reshape((len(pred), 1)))

        prediction_data = pred[-1]     

        print ("Inputs: {}".format(model.input_shape))
        print ("Outputs: {}".format(model.output_shape))
        print ("Actual input: {}".format(X_test.shape))
        print ("Actual output: {}".format(y_test.shape))

        print ("prediction data:")
        print (prediction_data)

        print ("actual data")
        X_test = scaler_x.inverse_transform(np.array(X_test).reshape((len(X_test), len(cols))))
        print (X_test)

        #########################################################################
        '''Plotting'''
        plt.plot(pred, label="predictions")

        y_test = scaler_y.inverse_transform(np.array(y_test).reshape((len( y_test), 1)))
        plt.plot([row[0] for row in y_test], label="actual")

        plt.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=2)

        ax = plt.axes()
        ax.yaxis.set_major_formatter(tick)
        save_fig(name+"_prediction_plot")
        plt.show()
        plt.clf()
        pd.DataFrame(history.history).plot(figsize=(8, 5))
        plt.grid(True)
        plt.gca().set_ylim(0, 1)
        save_fig(name+"_curves_plot")
        plt.show()
        plt.plot(history.history['loss'])
        plt.plot(history.history['val_loss'])
        plt.title('model loss')
        plt.ylabel('loss')
        plt.xlabel('epoch')
        plt.legend(['train', 'test'], loc='upper left')
        save_fig("Engro_learning_curves_plot")
        plt.show()

FileNotFoundError: [Errno 2] File AbbottLaboratoriesPakLtd_final.csv does not exist: 'AbbottLaboratoriesPakLtd_final.csv'

AbbottLaboratoriesPakLtd_final.csv
AbdullahShahaGhaziSugerMillsLtd_final.csv
AdamSugarMillsLimited_final.csv
AGPLimited_final.csv
AgriautoIndustriesLtd_final.csv
AgritechLimited_final.csv
AkzoNobelPakistanLimitedAkzoNobelPakistanLimited_final.csv
AlAbbasSugarMillsLtd_final.csv
AlGhaziTractorsLimited_final.csv
AlliedBankLtd_final.csv
AlShaheerCorporatio_final.csv
ArchromaPakistanLimited_final.csv
ArifHabibCorporationLtd_final.csv
ArujIndustriesLimited_final.csv
AskariBankLimited_final.csv
AtlasBatteryLtd_final.csv
ATLASHONDA_final.csv
AtTahurLtd_final.csv
AttockCementPakLtd_final.csv
AttockPetroleumLimited_final.csv
AzgardNineLimited_final.csv
BabaFariedSugarMillsLtd_final.csv
BankAlfalahLtd_final.csv
BankAlHabibLtd_final.csv
BankislamiPakistanLtd_final.csv
BankOfPunjab_final.csv
BawanyAirProductsLtd_final.csv
BergerPaintsPakistanLtd_final.csv
BestwayCementLimited_final.csv
BifoIndustriesLtd_final.csv
BilalFibresLtd_final.csv
BleesedTextileLtd_final.csv
BurshaneLPGPakistanLimited_final.