<a href="https://colab.research.google.com/github/AndyZhang127/temp/blob/main/Dissertation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1.Loading essential modules

In [None]:
#fbprophet
!pip install pystan~=2.14
!pip install fbprophet

In [None]:
#Basic packages
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import datetime
import time 
import seaborn as sns # Visualization


#Fbprophet
%matplotlib inline
import pandas as pd
from fbprophet import Prophet

#SVR
from sklearn.svm import SVR

#RF
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler

#LSTM/RNN/GRU
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM,GRU,SimpleRNN
from keras.layers import Dropout
from tensorflow import keras as ks
from keras.regularizers import l2
from keras.preprocessing.sequence import pad_sequences
import statsmodels.stats.diagnostic
import statsmodels.api as sm
import tensorflow as tf


plt.style.use('fivethirtyeight')

#

  import pandas.util.testing as tm


# 2.Defining necessary functions

## Part A: Producing missing value randomly

In [None]:
#Producing the missing data randomly
def missingRate_Data(df,columns,missing_rate=0):
  Source_missing = df.sample(frac=missing_rate)
  Source_missing_re=(Source_missing[columns] * 0).replace(0,np.nan)
  Source_missing[columns]=Source_missing_re[columns]
  index = Source_missing.index
  non_missing_df=df.query('index not in @index')
  Source_missing_final=pd.concat([non_missing_df,Source_missing])

  return Source_missing_final,index,Source_missing,Source_missing,non_missing_df

## Part B: Defining basic imputting methods (mean,mode,median)

In [None]:
# Define replace methods

def mean_replace(df,col):
  mean=df.agg({col:'mean'}).head(1)[0]
  df=df.fillna(mean)
  return df

def mode_replace(df,col):
  mode=df[col].mode()[0]
  df=df.fillna(mode)
  return df

def median_replace(df,col):
  median=df.agg({col:'median'}).head(1)[0]
  df=df.fillna(median)
  return df

## Part C: Defining Algrithm for immputation  (SVR,RF,LSTM,RNN,MLP,GCU,FBPROPHET)

In [None]:
class SVR(object):
  def __init__(self, C=1.0, ker='rbf', eps=0.2, shrink=True):
    self.C = C
    self.ker = ker
    self.shrink = shrink
    self.eps = eps
    self.build_Model()

  def build_Model(self,):
    self.model = SVR(C=self.C, cache_size=200, coef0=0.0, degree=3, eps=self.eps, gamma='auto',ker=self.ker, max_iter=-1, shrink=self.shrink, tol=0.001, verbose=False)
    
  def train_Model(self, train_X, train_Y):
    self.model.fit(train_X, train_Y)

  def predict(self, test_X):
    pred = self.model.predict(test_X)
    return pred

class MLP_M(object):
  def __init__(self, inputDim, hiddenNum, outputDim, lr):
    self.inputDim = inputDim
    self.hiddenNum = hiddenNum
    self.outputDim = outputDim
    self.opt = ks.optimizers.RMSprop(lr=lr, rho=0.9, eps=1e-06)
    self.build_Model()
    
  def build_Model(self):
    self.model = Sequential()
    self.model.add(Dense(self.hiddenNum, input_dim=self.inputDim, activation='relu'))
    self.model.add(Dense(1))
    self.model.compile(loss='mean_squared_error', optimizer=self.opt)
    
  def train_Model(self, train_X, train_Y, epoch, batchSize):
    self.model.fit(train_X, train_Y, epochs=epoch, batch_size=batchSize, verbose=1)
    
  def predict(self, test_X):
    pred = self.model.predict(test_X)
    return pred

class RNNs_M(object):
  def __init__(self, inputDim, hiddenNum, outputDim, unit, lr):
    self.inputDim = inputDim
    self.hiddenNum = hiddenNum
    self.outputDim = outputDim
    self.opt =  ks.optimizers.RMSprop(lr=lr, rho=0.9, eps=1e-06)
    self.build_Model(unit)

  def build_Model(self, unit="GRU"):
    self.model = Sequential()
    if unit == "GRU":
      self.model.add(GRU(self.hiddenNum, input_shape=(None, self.inputDim)))
    elif unit == "LSTM":
      self.model.add(LSTM(self.hiddenNum, input_shape=(None, self.inputDim)))
    elif unit == "RNN":
      self.model.add(SimpleRNN(self.hiddenNum, input_shape=(None, self.inputDim)))
      
    self.model.add(Dense(self.outputDim))
    self.model.compile(loss='mean_squared_error', optimizer=self.opt, metrics=["mean_absolute_percentage_error"])

  def train_Model(self, train_X, train_Y, epoch, batchSize):
    self.model.fit(train_X, train_Y, epochs=epoch, batch_size=batchSize, verbose=1, validation_split=0.0)

  def predict(self,test_X):
    pred = self.model.predict(test_X)
    return pred

class ProphetModel():
  def __init__(self,train_X,changepoint_prior_scale=0.01):
    self.train_X=train_X
    self.changepoint_prior_scale=changepoint_prior_scale

  def train_Model(self,train_X,changepoint_prior_scale=0.01):
    self.model = Prophet(changepoint_prior_scale).fit(train_X)

  def predict(self,test_X):
    pred = self.model.predict(test_X)
    return pred

class Random_forest():
  def __init__(self,train_X,train_Y):
    self.train_X=train_X
    self.train_Y=train_Y

  def train_Model(self,train_X,train_Y):
    self.rf=RandomForestRegressor()
    self.model=self.rf.fit(train_X, train_Y)

  def predict(self,test_X):
    pred = self.rf.predict(test_X)
    return pred
  



## Part D: Defining validation methods

In [None]:
# Define validation methods

def MSE(actual,predicted):
  MSE = np.square(np.subtract(actual,predicted)).mean() 
  MSE = math.sqrt(MSE)
  return MSE

def RMSE(actual,predicted):
  MSE = np.square(np.subtract(actual,predicted)).mean() 
  MSE = math.sqrt(MSE)
  RMSE = math.sqrt(MSE)
  return RMSE

# 3.Obtain Source Data

In [None]:
source_file=pd.read_csv('/content/Occupancy_Estimation.csv')
source_file

In [None]:
source_file.groupby('Date').count()

In [None]:
source_file['Datetime']=pd.to_datetime(source_file['Date']+' '+source_file['Time'])

In [None]:
source_continues_data=source_file[(source_file.Date >= '2017/12/22') & (source_file.Date <= '2017/12/24')]

In [None]:
source_continues_data=source_continues_data[['Datetime','S1_Temp','S2_Temp','S3_Temp','S4_Temp','S1_Light','S2_Light','S3_Light','S4_Light','S1_Sound','S2_Sound','S3_Sound','S4_Sound']]

In [None]:
source_continues_data.info()

In [None]:
source_continues_data.describe()

# 4.Visualize source data

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(25, 10),facecolor='w')
axs[0,0].plot(source_continues_data['Datetime'], source_continues_data['S1_Temp'],color='darkorange', label='original', linewidth=2,markerfacecolor='white')
axs[0,1].plot(source_continues_data['Datetime'], source_continues_data['S2_Temp'],color='darkorange', label='original', linewidth=2,markerfacecolor='white')
axs[1,0].plot(source_continues_data['Datetime'], source_continues_data['S3_Temp'],color='darkorange', label='original', linewidth=2,markerfacecolor='white')
axs[1,1].plot(source_continues_data['Datetime'], source_continues_data['S4_Temp'],color='darkorange', label='original', linewidth=2,markerfacecolor='white')
fig.suptitle('Temperature')

axs[0,0].grid(False)
axs[0,1].grid(False)
axs[1,0].grid(False)
axs[1,1].grid(False)

axs[0,0].set_facecolor('white')
axs[0,1].set_facecolor('white')
axs[1,0].set_facecolor('white')
axs[1,1].set_facecolor('white')


axs[0,0].set_title('S1_Temp', fontsize=14)
axs[0,1].set_title('S2_Temp', fontsize=14)
axs[1,0].set_title('S3_Temp', fontsize=14)
axs[1,1].set_title('S4_Temp', fontsize=14)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(25, 10),facecolor='w')
axs[0,0].plot(source_continues_data['Datetime'], source_continues_data['S1_Light'],color='darkorange', label='original', linewidth=1)
axs[0,1].plot(source_continues_data['Datetime'], source_continues_data['S2_Light'],color='darkorange', label='original', linewidth=1)
axs[1,0].plot(source_continues_data['Datetime'], source_continues_data['S3_Light'],color='darkorange', label='original', linewidth=1)
axs[1,1].plot(source_continues_data['Datetime'], source_continues_data['S4_Light'],color='darkorange', label='original', linewidth=1)
fig.suptitle('Light')

axs[0,0].grid(False)
axs[0,1].grid(False)
axs[1,0].grid(False)
axs[1,1].grid(False)

axs[0,0].set_facecolor('white')
axs[0,1].set_facecolor('white')
axs[1,0].set_facecolor('white')
axs[1,1].set_facecolor('white')


axs[0,0].set_title('S1_Light', fontsize=14)
axs[0,1].set_title('S2_Light', fontsize=14)
axs[1,0].set_title('S3_Light', fontsize=14)
axs[1,1].set_title('S4_Light', fontsize=14)

In [None]:
fig, axs = plt.subplots(2, 2, figsize=(25, 10),facecolor='w')
axs[0,0].plot(source_continues_data['Datetime'], source_continues_data['S1_Sound'],color='darkorange', label='original', linewidth=2,markerfacecolor='white')
axs[0,1].plot(source_continues_data['Datetime'], source_continues_data['S2_Sound'],color='darkorange', label='original', linewidth=2,markerfacecolor='white')
axs[1,0].plot(source_continues_data['Datetime'], source_continues_data['S3_Sound'],color='darkorange', label='original', linewidth=2,markerfacecolor='white')
axs[1,1].plot(source_continues_data['Datetime'], source_continues_data['S4_Sound'],color='darkorange', label='original', linewidth=2,markerfacecolor='white')
fig.suptitle('Sound')

axs[0,0].grid(False)
axs[0,1].grid(False)
axs[1,0].grid(False)
axs[1,1].grid(False)

axs[0,0].set_facecolor('white')
axs[0,1].set_facecolor('white')
axs[1,0].set_facecolor('white')
axs[1,1].set_facecolor('white')


axs[0,0].set_title('S1_Sound', fontsize=14)
axs[0,1].set_title('S2_Sound', fontsize=14)
axs[1,0].set_title('S3_Sound', fontsize=14)
axs[1,1].set_title('S4_Sound', fontsize=14)

# 5.Genenrate missing value randamly

In [None]:
columns=['S1_Temp','S2_Temp','S3_Temp','S4_Temp','S1_Light','S2_Light','S3_Light','S4_Light','S1_Sound','S2_Sound','S3_Sound','S4_Sound']

In [None]:
source_continues_data.columns

In [None]:
Source_missing_final_1,index_1,Source_missing_1,Source_missing_1,non_missing_df_1=missingRate_Data(source_continues_data,columns,missing_rate=0.05)
Source_missing_final_2,index_2,Source_missing_2,Source_missing_2,non_missing_df_2=missingRate_Data(source_continues_data,columns,missing_rate=0.1)
Source_missing_final_3,index_3,Source_missing_3,Source_missing_3,non_missing_df_3=missingRate_Data(source_continues_data,columns,missing_rate=0.15)
Source_missing_final_4,index_4,Source_missing_4,Source_missing_4,non_missing_df_4=missingRate_Data(source_continues_data,columns,missing_rate=0.20)
Source_missing_final_5,index_5,Source_missing_5,Source_missing_5,non_missing_df_5=missingRate_Data(source_continues_data,columns,missing_rate=0.25)
Source_missing_final_6,index_6,Source_missing_6,Source_missing_6,non_missing_df_6=missingRate_Data(source_continues_data,columns,missing_rate=0.30)

In [None]:
Source_missing_final_1_1=Source_missing_final_1.sort_index()
Source_missing_final_2_1=Source_missing_final_2.sort_index()
Source_missing_final_3_1=Source_missing_final_3.sort_index()
Source_missing_final_4_1=Source_missing_final_4.sort_index()
Source_missing_final_5_1=Source_missing_final_5.sort_index()
Source_missing_final_6_1=Source_missing_final_6.sort_index()

In [None]:
Source_missing_final_1.isnull().sum()

In [None]:
Source_missing_final_1.info()

In [None]:
import missingno as mno

In [None]:
mno.matrix(Source_missing_final_1_1, figsize = (20, 6))

In [None]:
mno.matrix(Source_missing_final_2_1, figsize = (20, 6))

In [None]:
mno.matrix(Source_missing_final_3_1, figsize = (20, 6))

In [None]:
mno.matrix(Source_missing_final_4_1, figsize = (20, 6))

In [None]:
mno.matrix(Source_missing_final_5_1, figsize = (20, 6))

In [None]:
mno.matrix(Source_missing_final_6_1, figsize = (20, 6))

# 6.Deploy different approches on datasets

In [None]:
columns=source_continues_data.drop(['Datetime'], axis=1).columns

In [None]:
columns

Index(['S1_Temp', 'S2_Temp', 'S3_Temp', 'S4_Temp', 'S1_Light', 'S2_Light',
       'S3_Light', 'S4_Light', 'S1_Sound', 'S2_Sound', 'S3_Sound', 'S4_Sound'],
      dtype='object')

In [None]:
source_continues_data[['S1_Temp']]

In [None]:
mean_5={}

for col in columns:
  df=mean_replace(Source_missing_final_1[[col]],col)
  mean_5[col]=df

mean_10={}
for col in columns:
  df=mean_replace(Source_missing_final_2[[col]],col)
  mean_10[col]=df

mean_15={}
for col in columns:
  df=mean_replace(Source_missing_final_3[[col]],col)
  mean_15[col]=df

mean_20={}
for col in columns:
  df=mean_replace(Source_missing_final_4[[col]],col)
  mean_20[col]=df

mean_25={}
for col in columns:
  df=mean_replace(Source_missing_final_5[[col]],col)
  mean_25[col]=df

mean_30={}
for col in columns:
  df=mean_replace(Source_missing_final_6[[col]],col)
  mean_30[col]=df

In [None]:
median_5={}

for col in columns:
  df=median_replace(Source_missing_final_1[[col]],col)
  median_5[col]=df

median_10={}
for col in columns:
  df=median_replace(Source_missing_final_2[[col]],col)
  median_10[col]=df

median_15={}
for col in columns:
  df=median_replace(Source_missing_final_3[[col]],col)
  median_15[col]=df

median_20={}
for col in columns:
  df=median_replace(Source_missing_final_4[[col]],col)
  median_20[col]=df

median_25={}
for col in columns:
  df=median_replace(Source_missing_final_5[[col]],col)
  median_25[col]=df

median_30={}
for col in columns:
  df=median_replace(Source_missing_final_6[[col]],col)
  median_30[col]=df

In [None]:
mode_5={}

for col in columns:
  df=mode_replace(Source_missing_final_1[[col]],col)
  mode_5[col]=df

mode_10={}
for col in columns:
  df=mode_replace(Source_missing_final_2[[col]],col)
  mode_10[col]=df

mode_15={}
for col in columns:
  df=mode_replace(Source_missing_final_3[[col]],col)
  mode_15[col]=df

mode_20={}
for col in columns:
  df=mode_replace(Source_missing_final_4[[col]],col)
  mode_20[col]=df

mode_25={}
for col in columns:
  df=mode_replace(Source_missing_final_5[[col]],col)
  mode_25[col]=df

mode_30={}
for col in columns:
  df=mode_replace(Source_missing_final_6[[col]],col)
  mode_30[col]=df

In [None]:
#list1=[mode_5,mode_10,mode_15,mode_20,mode_25,mode_30
#,median_5,median_10,median_15,median_20,median_25,median_30
#,mean_5,mean_10,mean_15,mean_20,mean_25,mean_30]

list1=[mean_5,mean_10,mean_15,mean_20,mean_25,mean_30]


col_MSE={}

for col in columns:
  for md in list1:
    col_MSE=MSE(source_continues_data[[col]],md[col])
    print(col_MSE)



In [None]:
col_RMSE={}

for md in list1:
  for col in columns:
    col_RMSE=RMSE(source_continues_data[[col]],md[col])
    print(col_RMSE)

## 6.1 Prophet

In [None]:
def get_format(df,col):
  df=df[['Datetime',col]]
  df['ds']=df['Datetime']
  df['y']=df[col]
  df=df[['ds','y']]
  return df

In [None]:
def run_Prophet(train_Model,future):
  Prm=ProphetModel(train_Model)
  Prm = Prophet(changepoint_prior_scale=0.01).fit(train_Model)
  Pred = Prm.predict(future)
  Pred_1=Pred[['ds','yhat']]
  Pred_1['y']=Pred['yhat']
  Pred_1=Pred_1[['ds','y']]
  pred_1_comp=pd.concat([train_Model,Pred_1])
  return pred_1_comp


In [None]:
columns=source_continues_data.drop(['Datetime'], axis=1).columns

In [None]:
index=index_1
Prophet_predict_1={}


for col in columns:
  Prophet_df=get_format(source_continues_data,col)
  future_1=Prophet_df.iloc[index,:][['ds']]
  train_Model_1=Prophet_df.query('index not in @index')
  Prophet_pred_df=run_Prophet(train_Model_1,future_1)
  Prophet_predict_1[col]=Prophet_pred_df

In [None]:
index=index_2
Prophet_predict_2={}

for col in columns:
  Prophet_df=get_format(source_continues_data,col)
  future_1=Prophet_df.iloc[index,:][['ds']]
  train_Model_1=Prophet_df.query('index not in @index')
  Prophet_pred_df=run_Prophet(train_Model_1,future_1)
  Prophet_predict_2[col]=Prophet_pred_df

In [None]:
index=index_3
Prophet_predict_3={}

for col in columns:
  Prophet_df=get_format(source_continues_data,col)
  future_1=Prophet_df.iloc[index,:][['ds']]
  train_Model_1=Prophet_df.query('index not in @index')
  Prophet_pred_df=run_Prophet(train_Model_1,future_1)
  Prophet_predict_3[col]=Prophet_pred_df

In [None]:
index=index_4
Prophet_predict_4={}

for col in columns:
  Prophet_df=get_format(source_continues_data,col)
  future_1=Prophet_df.iloc[index,:][['ds']]
  train_Model_1=Prophet_df.query('index not in @index')
  Prophet_pred_df=run_Prophet(train_Model_1,future_1)
  Prophet_predict_4[col]=Prophet_pred_df

In [None]:
index=index_5
Prophet_predict_5={}

for col in columns:
  Prophet_df=get_format(source_continues_data,col)
  future_1=Prophet_df.iloc[index,:][['ds']]
  train_Model_1=Prophet_df.query('index not in @index')
  Prophet_pred_df=run_Prophet(train_Model_1,future_1)
  Prophet_predict_5[col]=Prophet_pred_df

In [None]:
index=index_6
Prophet_predict_6={}

for col in columns:
  Prophet_df=get_format(source_continues_data,col)
  future_1=Prophet_df.iloc[index,:][['ds']]
  train_Model_1=Prophet_df.query('index not in @index')
  Prophet_pred_df=run_Prophet(train_Model_1,future_1)
  Prophet_predict_6[col]=Prophet_pred_df

In [None]:
list1=[Prophet_predict_1,Prophet_predict_2,Prophet_predict_3,Prophet_predict_4,Prophet_predict_5,Prophet_predict_6]


col_MSE={}

for col in columns:
  for md in list1:
    col_MSE=MSE(source_continues_data[[col]],md[col][['y']].sort_index())
    print(col_MSE)

In [None]:
list1=[Prophet_predict_1,Prophet_predict_2,Prophet_predict_3,Prophet_predict_4,Prophet_predict_5,Prophet_predict_6]


col_MSE={}

for col in columns:
  for md in list1:
    col_MSE=MSE(source_continues_data[[col]],md[col][['y']].sort_index())
    print(col_MSE)

## 6.2 RF

In [None]:
index=index_1
RF_predict_1={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])


for col in columns:
  rf=RandomForestRegressor()
  model = rf.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rf.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  RF_predict_1[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_2
RF_predict_2={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])

for col in columns:
  rf=RandomForestRegressor()
  model = rf.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rf.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  RF_predict_2[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_3
RF_predict_3={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])

for col in columns:
  rf=RandomForestRegressor()
  model = rf.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rf.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  RF_predict_3[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_4
RF_predict_4={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])

for col in columns:
  rf=RandomForestRegressor()
  model = rf.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rf.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  RF_predict_4[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_5
RF_predict_5={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])

for col in columns:
  rf=RandomForestRegressor()
  model = rf.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rf.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  RF_predict_5[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_6
RF_predict_6={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])

for col in columns:
  rf=RandomForestRegressor()
  model = rf.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rf.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  RF_predict_6[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
list1=[RF_predict_1,RF_predict_2,RF_predict_3,RF_predict_4,RF_predict_5,RF_predict_6]


col_MSE={}

for col in columns:
  for md in list1:
    col_MSE=MSE(source_continues_data[[col]],md[col].sort_index())
    print(col_MSE)

In [None]:
col_RMSE={}

for md in list1:
  for col in columns:
    col_RMSE=RMSE(source_continues_data[[col]],md[col].sort_index())
    print(col_RMSE)

## 6.3 SVC

In [None]:
index=index_1
SVC_predict_1={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])


for col in columns:
  rbf_svr = SVR(ker = 'rbf', C = 1000.0, gamma = 0.1)
  rbf_svr.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rbf_svr.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  SVC_predict_1[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_2
SVC_predict_2={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])


for col in columns:
  rbf_svr = SVR(ker = 'rbf', C = 1000.0, gamma = 0.1)
  rbf_svr.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rbf_svr.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  SVC_predict_2[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_3
SVC_predict_3={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])


for col in columns:
  rbf_svr = SVR(ker = 'rbf', C = 1000.0, gamma = 0.1)
  rbf_svr.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rbf_svr.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  SVC_predict_3[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_4
SVC_predict_4={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])


for col in columns:
  rbf_svr = SVR(ker = 'rbf', C = 1000.0, gamma = 0.1)
  rbf_svr.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rbf_svr.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  SVC_predict_4[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_5
SVC_predict_5={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])


for col in columns:
  rbf_svr = SVR(ker = 'rbf', C = 1000.0, gamma = 0.1)
  rbf_svr.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rbf_svr.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  SVC_predict_5[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
index=index_6
SVC_predict_6={}

test_df=source_continues_data.iloc[index,:]
train_Model_df=source_continues_data.query('index not in @index')

train_Model_x=[]
test_x=[]
for x in train_Model_df['Datetime'].values:
  train_Model_x.append([x])
for x in test_df['Datetime'].values:
  test_x.append([x])


for col in columns:
  rbf_svr = SVR(ker = 'rbf', C = 1000.0, gamma = 0.1)
  rbf_svr.fit(train_Model_x, train_Model_df[col].values)
  y_tests_predict=rbf_svr.predict(test_x)
  train_Model_df_tmp=train_Model_df[[col]]
  test_df_tmp=test_df[[col]]
  test_df_tmp['predict']=y_tests_predict
  train_Model_df_tmp['predict']=train_Model_df_tmp[col]
  SVC_predict_6[col]=pd.concat([train_Model_df_tmp[['predict']],test_df_tmp[['predict']]]).rename(columns={'predict':col})


In [None]:
list1=[SVC_predict_1,SVC_predict_2,SVC_predict_3,SVC_predict_4,SVC_predict_5,SVC_predict_6]


col_MSE={}

for col in columns:
  for md in list1:
    col_MSE=MSE(source_continues_data[[col]],md[col].sort_index())
    print(col_MSE)

In [None]:
col_RMSE={}

for md in list1:
  for col in columns:
    col_RMSE=RMSE(source_continues_data[[col]],md[col].sort_index())
    print(col_RMSE)

##6.4 MLP

In [None]:
#normalize time series
def NormalizeDataset(dataset):
  scaler = MinMaxScaler(feature_range=(0, 1))
  dataset = scaler.fit_transform(dataset)
  return dataset

In [None]:
#dividetrain_ModelTest
def dividetrain_ModelTest(dataset, index):
  train_Model,test=[],[]
  for i in range(len(dataset)):
    if i in index:
      test.append([dataset[i][0]])
    else:
      train_Model.append([dataset[i][0]])

  return np.asarray(train_Model), np.asarray(test)

In [None]:
#load data
def load_data(dataset,col):
  ts = dataset[col]
  data = ts.values.reshape(-1, 1).astype("float32")
  return ts,data

In [None]:
def createSamples(dataset, lookBack, RNN=True):
    dataX, dataY = [], []
    for i in range(len(dataset) - lookBack):
        sample_X = dataset[i:(i + lookBack), :]
        sample_Y = dataset[i + lookBack, :]
        dataX.append(sample_X)
        dataY.append(sample_Y)
    dataX = np.array(dataX) 
    dataY = np.array(dataY)
    if not RNN:
        dataX = np.reshape(dataX, (dataX.shape[0], dataX.shape[1]))

    return dataX, dataY

In [None]:
from numpy.ma.core import append
array=[]

for x in range(len(source_continues_data)):
  array.append(x)

In [None]:
hiddenNum=64
lr=1e-4
epoch=20
batchSize=32
plot_flag=True
outputDim=1

index=index_1
MLPm_index_1={}
for col in columns:
  Source_missing_final_1['index']=array
  Source_missing_final_1=Source_missing_final_1.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_1[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  inputDim=lookBack
  train_X_n, trainY_n = createSamples(train_Model, lookBack, RNN=False)
  test_X_n, testY_n = createSamples(test, lookBack, RNN=False)
  MLPm=MLP_M(inputDim, hiddenNum, outputDim, lr)
  MLPm.build_Model()
  MLPm.train_Model(train_X_n, trainY_n, epoch, batchSize)
  MLPm_train_ModelPred = MLPm.predict(train_X_n)
  MLPm_testPred = MLPm.predict(test_X_n)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  MLPm_testPred = scaler.inverse_transform(MLPm_testPred)
  MLPm_testPred=pd.DataFrame(MLPm_testPred).rename(columns={0: col})
  MLPm_testPred=MLPm_testPred.iloc[len(MLPm_testPred)-len(index):,:].set_index(index)
  MLPM_pre=dataset_source.query('index not in @index')
  df=pd.concat([MLPM_pre, MLPm_testPred]).sort_index()
  MLPm_index_1[col]=df

In [None]:
hiddenNum=64
lr=1e-4
epoch=20
batchSize=32
plot_flag=True
outputDim=1

index=index_2
MLPm_index_2={}
for col in columns:
  Source_missing_final_2['index']=array
  Source_missing_final_2=Source_missing_final_2.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_2[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  inputDim=lookBack
  train_X_n, trainY_n = createSamples(train_Model, lookBack, RNN=False)
  test_X_n, testY_n = createSamples(test, lookBack, RNN=False)
  MLPm=MLP_M(inputDim, hiddenNum, outputDim, lr)
  MLPm.build_Model()
  MLPm.train_Model(train_X_n, trainY_n, epoch, batchSize)
  MLPm_train_ModelPred = MLPm.predict(train_X_n)
  MLPm_testPred = MLPm.predict(test_X_n)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  MLPm_testPred = scaler.inverse_transform(MLPm_testPred)
  MLPm_testPred=pd.DataFrame(MLPm_testPred).rename(columns={0: col})
  MLPm_testPred=MLPm_testPred.iloc[len(MLPm_testPred)-len(index):,:].set_index(index)
  MLPM_pre=dataset_source.query('index not in @index')
  df=pd.concat([MLPM_pre, MLPm_testPred]).sort_index()
  MLPm_index_2[col]=df

In [None]:
hiddenNum=64
lr=1e-4
epoch=20
batchSize=32
plot_flag=True
outputDim=1

index=index_3
MLPm_index_3={}
for col in columns:
  Source_missing_final_3['index']=array
  Source_missing_final_3=Source_missing_final_3.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_3[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  inputDim=lookBack
  train_X_n, trainY_n = createSamples(train_Model, lookBack, RNN=False)
  test_X_n, testY_n = createSamples(test, lookBack, RNN=False)
  MLPm=MLP_M(inputDim, hiddenNum, outputDim, lr)
  MLPm.build_Model()
  MLPm.train_Model(train_X_n, trainY_n, epoch, batchSize)
  MLPm_train_ModelPred = MLPm.predict(train_X_n)
  MLPm_testPred = MLPm.predict(test_X_n)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  MLPm_testPred = scaler.inverse_transform(MLPm_testPred)
  MLPm_testPred=pd.DataFrame(MLPm_testPred).rename(columns={0: col})
  MLPm_testPred=MLPm_testPred.iloc[len(MLPm_testPred)-len(index):,:].set_index(index)
  MLPM_pre=dataset_source.query('index not in @index')
  df=pd.concat([MLPM_pre, MLPm_testPred]).sort_index()
  MLPm_index_3[col]=df

In [None]:
hiddenNum=64
lr=1e-4
epoch=20
batchSize=32
plot_flag=True
outputDim=1

index=index_4
MLPm_index_4={}
for col in columns:
  Source_missing_final_4['index']=array
  Source_missing_final_4=Source_missing_final_4.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_4[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  inputDim=lookBack
  train_X_n, trainY_n = createSamples(train_Model, lookBack, RNN=False)
  test_X_n, testY_n = createSamples(test, lookBack, RNN=False)
  MLPm=MLP_M(inputDim, hiddenNum, outputDim, lr)
  MLPm.build_Model()
  MLPm.train_Model(train_X_n, trainY_n, epoch, batchSize)
  MLPm_train_ModelPred = MLPm.predict(train_X_n)
  MLPm_testPred = MLPm.predict(test_X_n)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  MLPm_testPred = scaler.inverse_transform(MLPm_testPred)
  MLPm_testPred=pd.DataFrame(MLPm_testPred).rename(columns={0: col})
  MLPm_testPred=MLPm_testPred.iloc[len(MLPm_testPred)-len(index):,:].set_index(index)
  MLPM_pre=dataset_source.query('index not in @index')
  df=pd.concat([MLPM_pre, MLPm_testPred]).sort_index()
  MLPm_index_4[col]=df

In [None]:
hiddenNum=64
lr=1e-4
epoch=20
batchSize=32
plot_flag=True
outputDim=1

index=index_5
MLPm_index_5={}
for col in columns:
  Source_missing_final_5['index']=array
  Source_missing_final_5=Source_missing_final_5.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_5[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  inputDim=lookBack
  train_X_n, trainY_n = createSamples(train_Model, lookBack, RNN=False)
  test_X_n, testY_n = createSamples(test, lookBack, RNN=False)
  MLPm=MLP_M(inputDim, hiddenNum, outputDim, lr)
  MLPm.build_Model()
  MLPm.train_Model(train_X_n, trainY_n, epoch, batchSize)
  MLPm_train_ModelPred = MLPm.predict(train_X_n)
  MLPm_testPred = MLPm.predict(test_X_n)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  MLPm_testPred = scaler.inverse_transform(MLPm_testPred)
  MLPm_testPred=pd.DataFrame(MLPm_testPred).rename(columns={0: col})
  MLPm_testPred=MLPm_testPred.iloc[len(MLPm_testPred)-len(index):,:].set_index(index)
  MLPM_pre=dataset_source.query('index not in @index')
  df=pd.concat([MLPM_pre, MLPm_testPred]).sort_index()
  MLPm_index_5[col]=df

In [None]:
hiddenNum=64
lr=1e-4
epoch=20
batchSize=32
plot_flag=True
outputDim=1

index=index_6
MLPm_index_6={}
for col in columns:
  Source_missing_final_6['index']=array
  Source_missing_final_6=Source_missing_final_6.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_6[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  inputDim=lookBack
  train_X_n, trainY_n = createSamples(train_Model, lookBack, RNN=False)
  test_X_n, testY_n = createSamples(test, lookBack, RNN=False)
  MLPm=MLP_M(inputDim, hiddenNum, outputDim, lr)
  MLPm.build_Model()
  MLPm.train_Model(train_X_n, trainY_n, epoch, batchSize)
  MLPm_train_ModelPred = MLPm.predict(train_X_n)
  MLPm_testPred = MLPm.predict(test_X_n)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  MLPm_testPred = scaler.inverse_transform(MLPm_testPred)
  MLPm_testPred=pd.DataFrame(MLPm_testPred).rename(columns={0: col})
  MLPm_testPred=MLPm_testPred.iloc[len(MLPm_testPred)-len(index):,:].set_index(index)
  MLPM_pre=dataset_source.query('index not in @index')
  df=pd.concat([MLPM_pre, MLPm_testPred]).sort_index()
  MLPm_index_6[col]=df

In [None]:
indexs=[MLPm_index_1,MLPm_index_2,MLPm_index_3,MLPm_index_4,MLPm_index_5,MLPm_index_6]

for col in columns:
  #print(col)
  for index in indexs:
    col_MSE=MSE(source_continues_data[[col]],index[col])
    print(col_MSE)


In [None]:
indexs=[MLPm_index_1,MLPm_index_2,MLPm_index_3,MLPm_index_4,MLPm_index_5,MLPm_index_6]

for col in columns:
  for index in indexs:
    col_RMSE=RMSE(source_continues_data[[col]],index[col])
    print(col_RMSE)

## 6.2 GRU

In [None]:
unit="GRU"
lag = 24
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_1
GRUm_index_1={}
for col in columns:
  Source_missing_final_1['index']=array
  Source_missing_final_1=Source_missing_final_1.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_1[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  GRUm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  GRUm.build_Model(unit="GRU")
  GRUm.train_Model(train_X, trainY, epoch, batchSize)

  GRUm_testPred = GRUm.predict(test_X)
  GRUm_testPred = GRUm_testPred.reshape(-1, 1)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  GRUm_testPred = scaler.inverse_transform(GRUm_testPred)
  GRUm_testPred=pd.DataFrame(GRUm_testPred).rename(columns={0: col})
  GRUm_testPred=GRUm_testPred.iloc[len(GRUm_testPred)-len(index):,:].set_index(index)
  GRUm_pre=dataset_source.query('index not in @index')
  df=pd.concat([GRUm_pre, GRUm_testPred]).sort_index()
  GRUm_index_1[col]=df

In [None]:
unit="GRU"
lag = 24
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_2
GRUm_index_2={}
for col in columns:
  Source_missing_final_2['index']=array
  Source_missing_final_2=Source_missing_final_2.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_2[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  GRUm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  GRUm.build_Model(unit="GRU")
  GRUm.train_Model(train_X, trainY, epoch, batchSize)
  GRUm_testPred = GRUm.predict(test_X)
  GRUm_testPred = GRUm_testPred.reshape(-1, 1)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  GRUm_testPred = scaler.inverse_transform(GRUm_testPred)
  GRUm_testPred=pd.DataFrame(GRUm_testPred).rename(columns={0: col})
  GRUm_testPred=GRUm_testPred.iloc[len(GRUm_testPred)-len(index):,:].set_index(index)
  GRUm_pre=dataset_source.query('index not in @index')
  df=pd.concat([GRUm_pre, GRUm_testPred]).sort_index()
  GRUm_index_2[col]=df

In [None]:
unit="GRU"
lag = 24
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_3
GRUm_index_3={}
for col in columns:
  Source_missing_final_3['index']=array
  Source_missing_final_3=Source_missing_final_3.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_3[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  GRUm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  GRUm.build_Model(unit="GRU")
  GRUm.train_Model(train_X, trainY, epoch, batchSize)
  GRUm_testPred = GRUm.predict(test_X)
  GRUm_testPred = GRUm_testPred.reshape(-1, 1)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  GRUm_testPred = scaler.inverse_transform(GRUm_testPred)
  GRUm_testPred=pd.DataFrame(GRUm_testPred).rename(columns={0: col})
  GRUm_testPred=GRUm_testPred.iloc[len(GRUm_testPred)-len(index):,:].set_index(index)
  GRUm_pre=dataset_source.query('index not in @index')
  df=pd.concat([GRUm_pre, GRUm_testPred]).sort_index()
  GRUm_index_3[col]=df

In [None]:
unit="GRU"
lag = 24
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_4
GRUm_index_4={}
for col in columns:
  Source_missing_final_4['index']=array
  Source_missing_final_4=Source_missing_final_4.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_4[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  GRUm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  GRUm.build_Model(unit="GRU")
  GRUm.train_Model(train_X, trainY, epoch, batchSize)
  GRUm_testPred = GRUm.predict(test_X)
  GRUm_testPred = GRUm_testPred.reshape(-1, 1)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  GRUm_testPred = scaler.inverse_transform(GRUm_testPred)
  GRUm_testPred=pd.DataFrame(GRUm_testPred).rename(columns={0: col})
  GRUm_testPred=GRUm_testPred.iloc[len(GRUm_testPred)-len(index):,:].set_index(index)
  GRUm_pre=dataset_source.query('index not in @index')
  df=pd.concat([GRUm_pre, GRUm_testPred]).sort_index()
  GRUm_index_4[col]=df

In [None]:
unit="GRU"
lag = 24
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_5
GRUm_index_5={}
for col in columns:
  Source_missing_final_5['index']=array
  Source_missing_final_5=Source_missing_final_5.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_5[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  GRUm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  GRUm.build_Model(unit="GRU")
  GRUm.train_Model(train_X, trainY, epoch, batchSize)
  GRUm_testPred = GRUm.predict(test_X)
  GRUm_testPred = GRUm_testPred.reshape(-1, 1)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  GRUm_testPred = scaler.inverse_transform(GRUm_testPred)
  GRUm_testPred=pd.DataFrame(GRUm_testPred).rename(columns={0: col})
  GRUm_testPred=GRUm_testPred.iloc[len(GRUm_testPred)-len(index):,:].set_index(index)
  GRUm_pre=dataset_source.query('index not in @index')
  df=pd.concat([GRUm_pre, GRUm_testPred]).sort_index()
  GRUm_index_5[col]=df

In [None]:
unit="GRU"
lag = 24
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_6
GRUm_index_6={}
for col in columns:
  Source_missing_final_6['index']=array
  Source_missing_final_6=Source_missing_final_6.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_6[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  GRUm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  GRUm.build_Model(unit="GRU")
  GRUm.train_Model(train_X, trainY, epoch, batchSize)
  GRUm_testPred = GRUm.predict(test_X)
  GRUm_testPred = GRUm_testPred.reshape(-1, 1)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  GRUm_testPred = scaler.inverse_transform(GRUm_testPred)
  GRUm_testPred=pd.DataFrame(GRUm_testPred).rename(columns={0: col})
  GRUm_testPred=GRUm_testPred.iloc[len(GRUm_testPred)-len(index):,:].set_index(index)
  GRUm_pre=dataset_source.query('index not in @index')
  df=pd.concat([GRUm_pre, GRUm_testPred]).sort_index()
  GRUm_index_6[col]=df

In [None]:
indexs=[GRUm_index_1,GRUm_index_2,GRUm_index_3,GRUm_index_4,GRUm_index_5,GRUm_index_6]

for col in columns:
  #print(col)
  for index in indexs:
    col_MSE=MSE(source_continues_data[[col]],index[col])
    print(col_MSE)

In [None]:
indexs=[GRUm_index_1,GRUm_index_2,GRUm_index_3,GRUm_index_4,GRUm_index_5,GRUm_index_6]

for col in columns:
  for index in indexs:
    col_RMSE=RMSE(source_continues_data[[col]],index[col])
    print(col_RMSE)

In [None]:
columns

## 6.6 RNN

In [None]:
unit="RNN"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_1
RNNm_index_1={}
for col in columns:
  Source_missing_final_1['index']=array
  Source_missing_final_1=Source_missing_final_1.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_1[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  RNNm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  RNNm.build_Model(unit="RNN")
  RNNm.train_Model(train_X, trainY, epoch, batchSize)
  RNNm_train_ModelPred = RNNm.predict(train_X)
  RNNm_testPred = RNNm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  RNNm_testPred = scaler.inverse_transform(RNNm_testPred)
  RNNm_testPred=pd.DataFrame(RNNm_testPred).rename(columns={0: col})
  RNNm_testPred=RNNm_testPred.iloc[len(RNNm_testPred)-len(index):,:].set_index(index)
  RNNm_pre=dataset_source.query('index not in @index')
  df=pd.concat([RNNm_pre, RNNm_testPred]).sort_index()
  RNNm_index_1[col]=df

In [None]:
#RNN
unit="RNN"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_2
RNNm_index_2={}
for col in columns:
  Source_missing_final_2['index']=array
  Source_missing_final_2=Source_missing_final_2.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_2[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  RNNm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  RNNm.build_Model(unit="RNN")
  RNNm.train_Model(train_X, trainY, epoch, batchSize)
  RNNm_train_ModelPred = RNNm.predict(train_X)
  RNNm_testPred = RNNm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  RNNm_testPred = scaler.inverse_transform(RNNm_testPred)
  RNNm_testPred=pd.DataFrame(RNNm_testPred).rename(columns={0: col})
  RNNm_testPred=RNNm_testPred.iloc[len(RNNm_testPred)-len(index):,:].set_index(index)
  RNNm_pre=dataset_source.query('index not in @index')
  df=pd.concat([RNNm_pre, RNNm_testPred]).sort_index()
  RNNm_index_2[col]=df

In [None]:
#RNN
unit="RNN"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_3
RNNm_index_3={}
for col in columns:
  Source_missing_final_3['index']=array
  Source_missing_final_3=Source_missing_final_3.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_3[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  RNNm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  RNNm.build_Model(unit="RNN")
  RNNm.train_Model(train_X, trainY, epoch, batchSize)
  RNNm_train_ModelPred = RNNm.predict(train_X)
  RNNm_testPred = RNNm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  RNNm_testPred = scaler.inverse_transform(RNNm_testPred)
  RNNm_testPred=pd.DataFrame(RNNm_testPred).rename(columns={0: col})
  RNNm_testPred=RNNm_testPred.iloc[len(RNNm_testPred)-len(index):,:].set_index(index)
  RNNm_pre=dataset_source.query('index not in @index')
  df=pd.concat([RNNm_pre, RNNm_testPred]).sort_index()
  RNNm_index_3[col]=df

In [None]:
#RNN
unit="RNN"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_4
RNNm_index_4={}
for col in columns:
  Source_missing_final_4['index']=array
  Source_missing_final_4=Source_missing_final_4.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_4[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  RNNm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  RNNm.build_Model(unit="RNN")
  RNNm.train_Model(train_X, trainY, epoch, batchSize)
  RNNm_train_ModelPred = RNNm.predict(train_X)
  RNNm_testPred = RNNm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  RNNm_testPred = scaler.inverse_transform(RNNm_testPred)
  RNNm_testPred=pd.DataFrame(RNNm_testPred).rename(columns={0: col})
  RNNm_testPred=RNNm_testPred.iloc[len(RNNm_testPred)-len(index):,:].set_index(index)
  RNNm_pre=dataset_source.query('index not in @index')
  df=pd.concat([RNNm_pre, RNNm_testPred]).sort_index()
  RNNm_index_4[col]=df

In [None]:
#RNN
unit="RNN"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_5
RNNm_index_5={}
for col in columns:
  Source_missing_final_5['index']=array
  Source_missing_final_5=Source_missing_final_5.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_5[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  RNNm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  RNNm.build_Model(unit="RNN")
  RNNm.train_Model(train_X, trainY, epoch, batchSize)
  RNNm_train_ModelPred = RNNm.predict(train_X)
  RNNm_testPred = RNNm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  RNNm_testPred = scaler.inverse_transform(RNNm_testPred)
  RNNm_testPred=pd.DataFrame(RNNm_testPred).rename(columns={0: col})
  RNNm_testPred=RNNm_testPred.iloc[len(RNNm_testPred)-len(index):,:].set_index(index)
  RNNm_pre=dataset_source.query('index not in @index')
  df=pd.concat([RNNm_pre, RNNm_testPred]).sort_index()
  RNNm_index_5[col]=df

In [None]:
#RNN
unit="RNN"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_6
RNNm_index_6={}
for col in columns:
  Source_missing_final_6['index']=array
  Source_missing_final_6=Source_missing_final_6.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_6[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  RNNm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  RNNm.build_Model(unit="RNN")
  RNNm.train_Model(train_X, trainY, epoch, batchSize)
  RNNm_train_ModelPred = RNNm.predict(train_X)
  RNNm_testPred = RNNm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  RNNm_testPred = scaler.inverse_transform(RNNm_testPred)
  RNNm_testPred=pd.DataFrame(RNNm_testPred).rename(columns={0: col})
  RNNm_testPred=RNNm_testPred.iloc[len(RNNm_testPred)-len(index):,:].set_index(index)
  RNNm_pre=dataset_source.query('index not in @index')
  df=pd.concat([RNNm_pre, RNNm_testPred]).sort_index()
  RNNm_index_6[col]=df

## 6.7 LSTM

In [None]:
#RNN
unit="LSTM"
hiddenNum=64
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_1
LSTMm_index_1={}
for col in columns:
  Source_missing_final_1['index']=array
  Source_missing_final_1=Source_missing_final_1.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_1[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  LSTMm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  LSTMm.build_Model(unit="LSTM")
  LSTMm.train_Model(train_X, trainY, epoch, batchSize)
  LSTMm_testPred = LSTMm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  LSTMm_testPred = scaler.inverse_transform(LSTMm_testPred)
  LSTMm_testPred=pd.DataFrame(LSTMm_testPred).rename(columns={0: col})
  LSTMm_testPred=LSTMm_testPred.iloc[len(LSTMm_testPred)-len(index):,:].set_index(index)
  LSTMm_pre=dataset_source.query('index not in @index')
  df=pd.concat([LSTMm_pre, LSTMm_testPred]).sort_index()
  LSTMm_index_1[col]=df

In [None]:
#RNN
unit="LSTM"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1
hiddenNum=64
index=index_2
LSTMm_index_2={}
for col in columns:
  Source_missing_final_2['index']=array
  Source_missing_final_2=Source_missing_final_2.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_2[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  LSTMm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  LSTMm.build_Model(unit="LSTM")
  LSTMm.train_Model(train_X, trainY, epoch, batchSize)
  LSTMm_testPred = LSTMm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  LSTMm_testPred = scaler.inverse_transform(LSTMm_testPred)
  LSTMm_testPred=pd.DataFrame(LSTMm_testPred).rename(columns={0: col})
  LSTMm_testPred=LSTMm_testPred.iloc[len(LSTMm_testPred)-len(index):,:].set_index(index)
  LSTMm_pre=dataset_source.query('index not in @index')
  df=pd.concat([LSTMm_pre, LSTMm_testPred]).sort_index()
  LSTMm_index_2[col]=df

In [None]:
#RNN
unit="LSTM"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_3
LSTMm_index_3={}
for col in columns:
  Source_missing_final_3['index']=array
  Source_missing_final_3=Source_missing_final_3.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_3[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  LSTMm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  LSTMm.build_Model(unit="LSTM")
  LSTMm.train_Model(train_X, trainY, epoch, batchSize)
  LSTMm_train_ModelPred = LSTMm.predict(train_X)
  LSTMm_testPred = LSTMm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  LSTMm_testPred = scaler.inverse_transform(LSTMm_testPred)
  LSTMm_testPred=pd.DataFrame(LSTMm_testPred).rename(columns={0: col})
  LSTMm_testPred=LSTMm_testPred.iloc[len(LSTMm_testPred)-len(index):,:].set_index(index)
  LSTMm_pre=dataset_source.query('index not in @index')
  df=pd.concat([LSTMm_pre, LSTMm_testPred]).sort_index()
  LSTMm_index_3[col]=df

In [None]:
#RNN
unit="LSTM"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_4
LSTMm_index_4={}
for col in columns:
  Source_missing_final_4['index']=array
  Source_missing_final_4=Source_missing_final_4.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_4[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  LSTMm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  LSTMm.build_Model(unit="LSTM")
  LSTMm.train_Model(train_X, trainY, epoch, batchSize)
  LSTMm_train_ModelPred = LSTMm.predict(train_X)
  LSTMm_testPred = LSTMm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  LSTMm_testPred = scaler.inverse_transform(LSTMm_testPred)
  LSTMm_testPred=pd.DataFrame(LSTMm_testPred).rename(columns={0: col})
  LSTMm_testPred=LSTMm_testPred.iloc[len(LSTMm_testPred)-len(index):,:].set_index(index)
  LSTMm_pre=dataset_source.query('index not in @index')
  df=pd.concat([LSTMm_pre, LSTMm_testPred]).sort_index()
  LSTMm_index_4[col]=df

In [None]:
#RNN
unit="LSTM"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_5
LSTMm_index_5={}
for col in columns:
  Source_missing_final_5['index']=array
  Source_missing_final_5=Source_missing_final_5.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_5[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  LSTMm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  LSTMm.build_Model(unit="LSTM")
  LSTMm.train_Model(train_X, trainY, epoch, batchSize)
  LSTMm_train_ModelPred = LSTMm.predict(train_X)
  LSTMm_testPred = LSTMm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  LSTMm_testPred = scaler.inverse_transform(LSTMm_testPred)
  LSTMm_testPred=pd.DataFrame(LSTMm_testPred).rename(columns={0: col})
  LSTMm_testPred=LSTMm_testPred.iloc[len(LSTMm_testPred)-len(index):,:].set_index(index)
  LSTMm_pre=dataset_source.query('index not in @index')
  df=pd.concat([LSTMm_pre, LSTMm_testPred]).sort_index()
  LSTMm_index_5[col]=df

In [None]:
#RNN
unit="LSTM"
batch_size = 32
epoch = 1
hidden_dim = 64
lr = 1e-4
inputDim=1
outputDim=1

index=index_6
LSTMm_index_6={}
for col in columns:
  Source_missing_final_6['index']=array
  Source_missing_final_6=Source_missing_final_6.set_index('index')
  dataset_source=source_continues_data[[col]]
  dataset_mean=mean_replace(Source_missing_final_6[[col]],col)
  ts_mean,data_mean=load_data(dataset_mean,col)
  mean_dataset=NormalizeDataset(data_mean)
  train_Model_mean,test_mean = dividetrain_ModelTest(mean_dataset, index)
  train_Model=train_Model_mean
  test=mean_dataset
  lookBack=len(train_Model_mean)-len(index)
  train_X, trainY = createSamples(train_Model, lookBack)
  test_X, testY = createSamples(test, lookBack)
  LSTMm=RNNs_M(inputDim, hiddenNum, outputDim, unit, lr)
  LSTMm.build_Model(unit="LSTM")
  LSTMm.train_Model(train_X, trainY, epoch, batchSize)
  LSTMm_train_ModelPred = LSTMm.predict(train_X)
  LSTMm_testPred = LSTMm.predict(test_X)
  scaler = MinMaxScaler(feature_range=(0.0, 1.0)).fit(dataset_source)
  LSTMm_testPred = scaler.inverse_transform(LSTMm_testPred)
  LSTMm_testPred=pd.DataFrame(LSTMm_testPred).rename(columns={0: col})
  LSTMm_testPred=LSTMm_testPred.iloc[len(LSTMm_testPred)-len(index):,:].set_index(index)
  LSTMm_pre=dataset_source.query('index not in @index')
  df=pd.concat([LSTMm_pre, LSTMm_testPred]).sort_index()
  LSTMm_index_6[col]=df

In [None]:
indexs=[LSTMm_index_1,LSTMm_index_2,LSTMm_index_3,LSTMm_index_4,LSTMm_index_5,LSTMm_index_6]

for col in columns:
  #print(col)
  for index in indexs:
    col_MSE=MSE(source_continues_data[[col]],index[col])
    print(col_MSE)

In [None]:
indexs=[LSTMm_index_1,LSTMm_index_2,LSTMm_index_3,LSTMm_index_4,LSTMm_index_5,LSTMm_index_6]

for col in columns:
  #print(col)
  for index in indexs:
    col_RMSE=RMSE(source_continues_data[[col]],index[col])
    print(col_RMSE)