<a href="https://colab.research.google.com/github/RiddhimanRaut/Deep-Learning-based-CPR-estimation/blob/main/CPR_prediction_multivariate_LSTM_tobetrialled_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Standard Imports for RNNs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
from datetime import datetime

In [2]:
url = "https://github.com/RiddhimanRaut/Deep-Learning-based-CPR-estimation/blob/main/excel_dataset.xlsx?raw=true"
df = pd.read_excel(url)
#df = df[5::6] #Hour-wise Data
df.head()

Unnamed: 0,Date Time,CPRAVG,PROP,BUT,ETHYLENE,AI12201M,FLW,COT,TEMP,PRESS,O2,DRAFT,ETH
0,01.01.2009 00:10:00,0.428151,18.960609,3.97363,29.73247,0.014431,30711.65039,820.11821,632.399261,0.466357,1.44416,-5.616851,75.8097
1,01.01.2009 00:20:00,0.427783,18.96036,3.973562,30.12359,0.014822,30700.66016,819.662811,633.057602,0.475288,1.413894,-5.540349,75.810066
2,01.01.2009 00:30:00,0.427432,19.009649,4.181924,30.477831,0.012689,30689.66016,819.886444,631.841385,0.464559,1.515123,-5.628389,75.552841
3,01.01.2009 00:40:00,0.427082,19.15571,3.950392,30.425819,0.01258,30678.66016,819.660172,632.495224,0.471809,1.404055,-5.576591,75.580223
4,01.01.2009 00:50:00,0.426732,19.33464,3.689848,30.546061,0.012472,30667.66016,819.646805,632.388214,0.470504,1.191485,-5.274679,75.638023


In [3]:
#Separating Date and Time
#To be used in plotting in X-axis
date_time = pd.to_datetime(df.pop('Date Time'), format='%d.%m.%Y %H:%M:%S') #pop command takes the Date Time Column out of df, so df no longer has that column.

We need to remove $Ethylene$ and $AI12201M$ columns as they are not controllable factors. 

In [4]:
df = df.drop('ETHYLENE',axis=1)
df = df.drop('AI12201M',axis=1)
df.head()

Unnamed: 0,CPRAVG,PROP,BUT,FLW,COT,TEMP,PRESS,O2,DRAFT,ETH
0,0.428151,18.960609,3.97363,30711.65039,820.11821,632.399261,0.466357,1.44416,-5.616851,75.8097
1,0.427783,18.96036,3.973562,30700.66016,819.662811,633.057602,0.475288,1.413894,-5.540349,75.810066
2,0.427432,19.009649,4.181924,30689.66016,819.886444,631.841385,0.464559,1.515123,-5.628389,75.552841
3,0.427082,19.15571,3.950392,30678.66016,819.660172,632.495224,0.471809,1.404055,-5.576591,75.580223
4,0.426732,19.33464,3.689848,30667.66016,819.646805,632.388214,0.470504,1.191485,-5.274679,75.638023


In [5]:
#Variables for training
cols = list(df)[0:df.shape[1]]
#To enforce that values in these columns are actually float type data
df_for_rnn = df[cols].astype(float)

In [6]:
#Creating training_df and test_df
n = len(df)
training_df = df_for_rnn[0:int(n*0.7)]
test_set = df_for_rnn[int(n*0.7):n]

In [7]:
#Scaling the Dataset
from sklearn.preprocessing import StandardScaler, MinMaxScaler
#scaler = StandardScaler()
scaler = MinMaxScaler(feature_range = (0,1))
df_for_training_scaled = scaler.fit_transform(training_df) #Scales and converts to numpy object
print('Scaled Dataset:\n',df_for_training_scaled)

Scaled Dataset:
 [[0.04261512 0.39141737 0.71843606 ... 0.54553167 0.061713   0.69995722]
 [0.03834461 0.39139343 0.71841953 ... 0.53416085 0.09054363 0.69998058]
 [0.03428528 0.39611563 0.76908826 ... 0.57219227 0.05736476 0.68357295]
 ...
 [0.94817084 0.28810413 0.68416253 ... 0.67555781 0.51802072 0.79587724]
 [0.95048011 0.28807252 0.68538325 ... 0.68702255 0.51584291 0.79587529]
 [0.95188739 0.28809554 0.70426883 ... 0.68955967 0.51498322 0.79587237]]


In [8]:
#Readying the Multivariate Input and Univariate Output

#initialisation
trainX = [] 
trainY = []

n_future = 1 #number of hours into the future
n_past = 60  #number of hours into the past

for i in range(n_past, len(df_for_training_scaled)-n_future+1):
  trainX.append(df_for_training_scaled[i-60:i,1:10])
  trainY.append(df_for_training_scaled[i+n_future-1:i+n_future,0])
trainX = np.array(trainX)   # shape 1: no of records, shape 2: past_days, shape 3: input_cols
trainY = np.array(trainY)   # shape 1: records, shape 2: future prediction data
print('TrainX shape = (%d,%d,%d)'%(trainX.shape[0],trainX.shape[1],trainX.shape[2]))
print('TrainY shape = (%d,%d)'%(trainY.shape[0],trainY.shape[1]))

TrainX shape = (8667,60,9)
TrainY shape = (8667,1)


In [9]:
# Part 2 - Building the RNN

# Importing the Keras libraries and packages
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.regularizers import l2
model = Sequential()
model.add(LSTM(64,activation='sigmoid',return_sequences=True,input_shape = (trainX.shape[1],trainX.shape[2])))
model.add(LSTM(32,activation='sigmoid',return_sequences=False, recurrent_regularizer=l2(0.01), bias_regularizer=l2(0.01)))
model.add(Dropout(0.2))
model.add(Dense(trainY.shape[1]))

model.compile(optimizer='adam',loss = 'mse')
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 60, 64)            18944     
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                12416     
_________________________________________________________________
dropout (Dropout)            (None, 32)                0         
_________________________________________________________________
dense (Dense)                (None, 1)                 33        
Total params: 31,393
Trainable params: 31,393
Non-trainable params: 0
_________________________________________________________________


In [None]:
#Let's traaaaaaaaaaaainnn

history = model.fit(trainX,trainY,epochs=200,batch_size= 32,verbose=1,validation_split=0.1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [None]:
plt.plot(history.history['loss'],label = 'Training Loss')
plt.plot(history.history['val_loss'],label = 'Validation Loss')
plt.legend()

In [None]:
test_df_scaled = scaler.transform(test_set)
testX = []
for i in range(n_past,len(test_df_scaled)+n_future-1):
  testX.append(test_df_scaled[i-n_past:i,1:10])
testX = np.array(testX)
testX.shape

In [None]:
predicted_CPRAVG = model.predict(testX)
predicted_CPRAVG = np.repeat(predicted_CPRAVG,test_df_scaled.shape[1],axis = -1)
predicted_CPRAVG = scaler.inverse_transform(predicted_CPRAVG)[:,0]
predicted_CPRAVG = predicted_CPRAVG.reshape(-1,1)
print(predicted_CPRAVG)
predicted_CPRAVGplot = np.empty_like(df)
predicted_CPRAVGplot[:,:] = np.nan
predicted_CPRAVGplot[training_df.shape[0]+n_past:df.shape[0],:] = predicted_CPRAVG
plt.plot(df['CPRAVG'],color = 'red', label = 'Real CPR avg')
plt.plot(predicted_CPRAVGplot[:,0], color = 'blue', label = 'Predicted CPR avg')
plt.legend()

Time to predict the future!

We shall use the previous $200$ data predict $CPRAVG$.


In [None]:
# starting_record = df.shape[0]
# CPRAVG_real_values = df['CPRAVG']
url_future = "https://raw.githubusercontent.com/RiddhimanRaut/Deep-Learning-based-CPR-estimation/main/Future_test_dataset.csv"
df_future = pd.read_csv(url_future)
df_future.drop(df_future.columns[10],axis = 1, inplace=True)
new_df_train = df_future[0:df_future.shape[0]-1000]
new_df_test = df_future[df_future.shape[0]-1000:df_future.shape[0]]
temp = pd.DataFrame.to_numpy(new_df_train)
length_of_future_prediction = new_df_test.shape[0]
for i in range(length_of_future_prediction):   #Take past 200 data, predict next 1000 values
  print("Predicting CPRAVG for entry:",i)
  #Gathering the past 200 data
  total_df_length = temp.shape[0]
  future_Xinput = temp[total_df_length - 200:total_df_length]
  future_Xinput_scaled = scaler.transform(future_Xinput)
  future_Xinput_scaled_batch = np.array([future_Xinput_scaled[:,1:10]])
  future_prediction = model.predict(future_Xinput_scaled_batch)
  future_prediction = np.repeat(future_prediction, future_Xinput_scaled.shape[1],axis = -1)
  future_prediction = scaler.inverse_transform(future_prediction)[:,0]
  #Parameters to be set:
  # future_data = df_future.loc[total_df_length - 200+i]
  # future_data = pd.DataFrame.to_numpy(future_data)
  # future_data = future_data[0:10]
  future_data = new_df_test.loc[new_df_train.shape[0]+i]
  future_data = pd.DataFrame.to_numpy(future_data)
  future_data
  future_data[0] = future_prediction[0]
  future_data = future_data.reshape(1,10)
  temp = np.append(temp,future_data, axis = 0)
future_data_numpy = temp[new_df_train.shape[0]:new_df_train.shape[0]+new_df_test.shape[0],:]
CPRAVG_predicted_values = future_data_numpy[:,0]
# print(CPRAVG_predicted_values)

In [None]:
# url_future = "https://raw.githubusercontent.com/RiddhimanRaut/Deep-Learning-based-CPR-estimation/main/Future_test_dataset.csv"
# new_training_dataset = pd.read_csv(url_future)
# new_training_dataset.drop(new_training_dataset.columns[10], axis = 1, inplace=True)
# new_training_dataset_scaled = scaler.transform(new_training_dataset)
# length_of_whole_dataset = new_training_dataset.shape[0]
# new_n_future = 1
# new_n_past = 200
# for i in range(length_of_whole_dataset-1000,length_of_whole_dataset):




In [None]:
plt.plot(CPRAVG_predicted_values,color = 'blue', label = 'Predicted CPR avg')
df_future2 = pd.read_csv(url_future)
new_CPRAVG = df_future2['CPRAVG'][new_df_train.shape[0]:new_df_train.shape[0]+new_df_test.shape[0]]
new_CPRAVG = pd.DataFrame.to_numpy(new_CPRAVG)
# print(new_CPRAVG)
plt.plot(new_CPRAVG,color = 'red', label = 'New CPR avg')
plt.legend()

In [None]:
while True:pass
