<a href="https://colab.research.google.com/github/ProjectFace2/reservoirOutflow/blob/master/Ensemble_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import numpy as np
from numpy import concatenate
from datetime import datetime
import matplotlib as mpl
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from math import sqrt
import fbprophet as fb
import pickle

dataset 

In [0]:
df = pd.read_csv('datasets/allYearHarangi.csv',index_col=0)
df.drop(["Present Storage(TMC)",'Reservoir Level(TMC)','Outflow'],axis = 1,inplace = True)
scaler = MinMaxScaler(feature_range=(0, 1))
scaled = scaler.fit_transform(df.values)
reshaped=pd.DataFrame({'Inflow':scaled[:,0],'MADIKERI':scaled[:,1],'SOMWARPET':scaled[:,2],'VIRAJPET':scaled[:,3]})


In [0]:
'''
In this method, we use all the lag days and lead days and construct a dataframe, where
the lag and lead days are represented in a single row
Using these features we can predict the class of the present day with the data from the previous days
This holds good for all the rows in the dataframe
'''

#dataset preparation with lag
def series_to_supervised(data, lag_days=1, lead_days=1, dropnan=True):
    no_of_features = 1 if type(data) is list else data.shape[1]
    print(no_of_features)
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(lag_days, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(no_of_features)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, lead_days):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(no_of_features)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(no_of_features)]
# put it all together
    agg = pd.concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

In [0]:
class LocallyWeightedRegression:
    def __init__(self):
        return
        
    def setup(self,X,y):
        lag_val = int(X.shape[1]/4)
        begin_year = ''
        if(lag_val==9):
            begin_year='2011-01-10'
        else:
            begin_year='2011-01-05'
        self.train_x,self.train_y = self.init_train( lag_val, begin_year)

    def fit(self, train_x, train_y):
        self.setup(train_x,train_y)

    def init_train( self, lag_val=4, begin_year='2011-01-04'):
        df = pd.read_csv('Datasets/allYearHarangi.csv',header=0,parse_dates=True,index_col=0)
        x=df.drop(["Present Storage(TMC)",'Reservoir Level(TMC)','Outflow'],axis = 1)
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaled = scaler.fit_transform(x.values)
        reshaped=pd.DataFrame({'Inflow':scaled[:,0],'MADIKERI':scaled[:,1],'SOMWARPET':scaled[:,2],'VIRAJPET':scaled[:,3]})
        idx = pd.date_range('2011-01-01', '2018-12-31') 
        reshaped['Dates']=idx
        df=reshaped
        df['month'] = pd.DatetimeIndex(df["Dates"]).month
        df['year'] = pd.DatetimeIndex(df["Dates"]).year
        mask = (df['month'] <= 12)
        mask1 = (df['year'] >= 2011)&(df['year'] <= 2016)
        df = df.loc[mask]
        df = df.loc[mask1]
        df.set_index(b,inplace = True)
        df.drop(['month','year'],axis = 1,inplace = True)
#         lag_val = 4
        values = df.values
        values = values.astype('float32')
        reframed = series_to_supervised(values, lag_val, 1)#lag of 4 days
        reframed.drop(reframed.columns[[-1,-2,-3]], axis=1, inplace=True)
        idx = pd.date_range(begin_year, '2016-12-31') 
        reframed['Dates']=idx
        reframed['month']=pd.DatetimeIndex(reframed['Dates']).month
        reframed=reframed.sort_values(by=['month','Dates'])
        reframed.drop(columns=['month','Dates'],inplace=True)
        values = reframed.values
        train_x = values[:,:-1]
        Inflow = values[:,-1]
        train_y = Inflow.reshape((train_x.shape[0],1))
        return train_x,train_y
   
    #locally weighted regression

    def lwr1(x0, inp, out, k):
        m,n = np.shape(inp)
        ypred = np.zeros(m)    
        ypred = x0 * beta(x0, inp, out, k)
        #print("The final prediction is :",ypred)
        return ypred
    
    def beta(point, inp, out, k):
        wt = kernal(point, inp, k)
        #print("The weight of betas is",wt)
        beta_val = (inp.T * (wt*inp)).I * inp.T * wt * out
        #print("The weight is beta value is",beta_val)
        return beta_val
    
    def kernal(point, inp, k):
        l,b = np.shape(inp)
        weights = np.mat(np.eye((l)))
        #print(weights)    
        for i in range(l):
            #print(point.shape,inp[i].shape)
            diff = point - inp[i]
            weights[i,i] = np.exp(np.dot(diff,diff.T) / (-2.0 * (k**2)))
        return weights
    def call_Lwr(self, test_x,train_x,train_y,k=0):
            ypred = []
            train_X = train_x
            train_y = train_y
            for i in test_x:
                ypred.append(lwr1(i, train_X, train_y, 7.15))
            ypred = np.array(ypred).reshape(len(ypred),1)
            inv_yhat = np.concatenate((ypred, test_x[:, -3:]), axis=1)
            out=scaler.inverse_transform(inv_yhat) 
            out=out[:,0]
            return out

    def predict(self, test_x):
        return self.call_Lwr(test_x,train_x,train_y)

In [0]:
x=reshaped.iloc[-5:-1,:].values
x=np.append(x,[[0.0,0.0,0.0,0.0]],axis=0)
test=series_to_supervised(x,4,0)
test

4


Unnamed: 0,var1(t-4),var2(t-4),var3(t-4),var4(t-4),var1(t-3),var2(t-3),var3(t-3),var4(t-3),var1(t-2),var2(t-2),var3(t-2),var4(t-2),var1(t-1),var2(t-1),var3(t-1),var4(t-1)
4,0.001322,0.0,0.0,0.0,0.001258,0.0,0.0,0.0,0.001309,0.0,0.0,0.0,0.001271,0.0,0.0,0.0


In [0]:
def predict(queryset,date):
  '''based on query set ,and given date range predicts result for given date with multiple models and returns ensembled result 
    query set can be single row or multiple row set, in case of multiple set use dataframe range with set of prediction seeking dates 
  '''
  # lstm prediction
  n_hours = 4
  n_features = 4
  test_x=queryset.values
  lstm_test_X = test_x.reshape((test_x.shape[0], n_hours, n_features))
  with open('models/lstmInf_forecast_model_lag4.pckl', 'rb') as fin:
    lstm_lag4_model = pickle.load(fin)
  lstm_inv_yhat=lstm_lag4_model.predict(lstm_test_X)
  test_X = lstm_test_X.reshape((lstm_test_X.shape[0], n_hours*n_features))
  inv_yhatx = concatenate((yhat, test_X[:, -3:]), axis=1)
  inv_yhaty = scaler.inverse_transform(inv_yhatx)
  lstm_res = inv_yhaty[:,0]

  # fb prophet prediction
  fb_test=queryset
  varList=fb_test.columns.tolist()
  varList.insert(0,'ds')
  fb_test['ds']=date
  fb_test_X = pd.DataFrame(fb_test[varList])
  with open('models/fbInf_forecast_model_lag4.pckl', 'rb') as fin:
    lstm_lag4_model = pickle.load(fin)
  proph_inv_yhat = proph_lag4_model.predict(fb_test_X).values
  proph_res = fb_inv_yhat[:,-1]

  # lwr prediction
  lwr_test=queryset
  with open('models/lwrInf_forecast_model_lag4.pckl', 'rb') as fin:
    lwr_lag4_model = pickle.load(fin)
  lwr_res = lwr_lag4_model.predict(lwr_test)
  # ensembled prediction
  result=((fb_inv_yhat[:,-1])+(inv_yhat))/2 #as of now it is average
  return result