In [1]:
import numpy as np
import pandas as pd
import keras
from pandas.tseries.offsets import Hour, Minute
from pandas.tseries.offsets import Day, MonthEnd
from pandas.tseries.offsets import Hour
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error as mse
from keras.preprocessing.sequence import TimeseriesGenerator
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras_tuner.tuners import RandomSearch
import keras_tuner
from tensorflow.keras.utils import set_random_seed as set_seed
set_seed(42)


In [2]:


#df = pd.read_csv('Wind_deseason.csv')
df = pd.read_csv('PV_deseason_15.csv', parse_dates=[0], index_col=0)


In [3]:
def TrainTest(df):
    
    num_rows = df.shape[0]
    
    num_training = (num_rows * 8)//10
   
    #train = df.iloc[0:num_training]
    #test = df.iloc[num_training:num_rows]
    
    
    return num_training 
        
            

In [4]:
def lags(df, n):
    
    df_new = df
    
    for i in range(n):
        name = "lag "+ str(i+1)
        df_new[name] = df['310_PV_1'].shift(i+1)
    
    return df_new

In [5]:

def model1Train(X,Y):
     
    mod = Sequential()
    mod.add(LSTM(6, activation='sigmoid', input_shape=(107, 1), return_sequences = True))
    mod.add(LSTM(6, activation='sigmoid', input_shape=(107, 1), return_sequences = True))
    mod.add(LSTM(6, activation='sigmoid', input_shape=(107, 1)))
    mod.add(Dense(1))
    mod.compile(optimizer='adam', loss='mse', metrics = ['accuracy'])
       
        
   
        
    mod.fit(x = X, y = Y,epochs=20,verbose = 1)#keep low for now, edit later
    
    return mod
    
    
    

In [6]:
def buildmodel(hp):
    mod = Sequential()
    mod.add(LSTM(units = hp.Int('units', min_value = 1, max_value = 8, step = 1), activation='sigmoid', input_shape=(107, 1)))
    mod.add(Dense(1))
    mod.compile(optimizer=keras.optimizers.Adam(hp.Choice('learning rate', values = [0.001, 0.01, 0.1, 0.3, 0.5])), loss='mse', metrics = ['accuracy'])
    return mod

In [7]:
def model2Train(gen):
     
        

    
    tuner = keras_tuner.RandomSearch(buildmodel, objective='loss', max_trials=5,overwrite=True, seed = 40)
    tuner.search(gen, epochs=3)
    best_mod = tuner.get_best_models()[0]
    params = tuner.results_summary()
    
    return best_mod, params
    

In [8]:
def TestModel(X,test, mod, scaler):
    
    MSEs = []
    
    for i in range(len(X)):
        array = np.array(X[i])
        
        pred = mod.predict(array.reshape(1,107,1))
        print(pred)
        #pred = scaler.inverse_transform(pred)
        unscaled_pred = scaler.inverse_transform(pred)
        unscaled_test = scaler.inverse_transform([[test[i]]])
       
        
        MSEs.append(mse(unscaled_test[0],unscaled_pred[0]))
    return MSEs

In [9]:
#def modelNaiveTest(df):

    
    
    


In [10]:
def runModels(df, last, season):
    
    scaler = MinMaxScaler()
    
    bound = TrainTest(df)
    
    train = df[0:bound]
    test = df[bound:]
    
    
    scaler.fit(train['310_PV_1'].to_frame())
    
    scaled_df = scaler.transform(df['310_PV_1'].to_frame())
    scaled_last = scaler.transform(last['310_PV_1'].to_frame())
    scaled_df = np.append(scaled_last, scaled_df)
    
    generator1 =  TimeseriesGenerator(scaled_df, scaled_df, length=107, batch_size=1)
    generator = TimeseriesGenerator(scaled_df[107:bound+107], scaled_df[107:bound+107], length=107, batch_size=1)
    
    
    scaled_df2 = pd.DataFrame(scaled_df, columns = ['310_PV_1'])
    
    lag = lags(scaled_df2, 107)
    
    lag = lag[lag.columns[::-1]]
    
    lagged_train = lag.iloc[107:bound+107]
    lagged_test = lag.iloc[bound+107:]

    
    
    X_test = lagged_test.drop('310_PV_1', axis = 1).values.tolist()
    Y_test = lagged_test['310_PV_1'].values.tolist()
  
    
    model2, params= model2Train(generator)
    
    model2.save(season)
    
    Tests = TestModel(X_test, Y_test, model2, scaler)
    
    
    print(Tests)
    
   
    
    return Tests, params 

    

In [11]:


df1 = df[df.index.month.isin([1,2,3])]
df1 = df1.iloc[:-1 , :]


df2 = df[df.index.month.isin([4,5,6])]

df3 = df[df.index.month.isin([7,8,9])]

df4 = df[df.index.month.isin([10,11,12])]

In [12]:
 mse1, params1 = runModels(df1,df4.iloc[-107:], 'season1')


Trial 1 Complete [00h 07m 16s]
loss: 0.004261706955730915

Best loss So Far: 0.004261706955730915
Total elapsed time: 00h 07m 16s

Search: Running Trial #2

Value             |Best Value So Far |Hyperparameter
6                 |4                 |units
0.001             |0.01              |learning rate

Epoch 1/3
  21/6881 [..............................] - ETA: 2:38 - loss: 0.0227 - accuracy: 0.0000e+00

In [13]:
mse3, params2 = runModels(df2,df1.iloc[-107:], 'season2')

In [14]:
mse5, params3 = runModels(df3, df2.iloc[-107:],'season3')

In [15]:
mse7, params4 = runModels(df4, df3.iloc[-107:],'season4')

In [16]:
print('season 1')

print(params1)
#print(mse1)
#print('model Naive:')
#print(mse2)


In [17]:
print('season 2')

print(params2)
#print(mse1)
#print('model Naive:')
#print(mse2)


In [18]:
print('season 3')

print(params3)
#print(mse1)
#print('model Naive:')
#print(mse2)


In [19]:
print('season 4')

print(params4)
#print(mse1)
#print('model Naive:')
#print(mse2)


In [20]:

d = {'Season1':mse1,'Season2':mse3,'Season3':mse5,'Season4':mse7}
df_errors_LSTM = pd.DataFrame({k:pd.Series(v) for k,v in d.items()})
df_errors_LSTM.to_csv('LSTM_errors_scaled_random.csv')



In [21]:
print(params1)