In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from keras import models
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from keras.layers import Dropout
from sklearn.metrics import mean_squared_error,mean_absolute_percentage_error as mape
from keras import optimizers

In [2]:
def preparedata(file):
    cloud_cover = pd.read_csv(file, skipinitialspace = True)
    cloud_cover['Date'] = cloud_cover[['Year', 'Month', 'Day']].apply(lambda x: "{0}-{1}-{2}".format(str(x[0]).zfill(2), str(x[1]).zfill(2), str(x[2]).zfill(2)), axis=1) 
    cloud_cover['Time'] = cloud_cover[['Hour', 'Minute']].apply(lambda x: "{}:{}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
    cloud_cover.drop(["Year", "Month", "Day", "Hour", "Minute", "Second"], axis=1, inplace=True)  
    cloud_cover = cloud_cover.rename(columns={"CloudCover":"cloud_cover"})
    cloud_cover = cloud_cover.groupby(['Date', 'Time']).agg(cloud_cover=('cloud_cover','mean')).reset_index()
    cloud_cover['DateTime'] = cloud_cover[['Date', 'Time']].apply(lambda x: "{} {}".format(str(x[0]).zfill(2), str(x[1]).zfill(2)), axis=1)
    cloud_cover.drop(["Date", "Time"], axis=1, inplace=True)        
    cloud_cover['DateTime'] = pd.to_datetime(cloud_cover['DateTime'], format="%Y-%m-%d %H:%M")
    cloud_cover.dropna(inplace=True)
    return cloud_cover

In [3]:
cloud_cover = preparedata('../prophet_ver2/Bangkhuntean_CloudCover_2021-16Nov-16Dec.csv')
cloud_cover

Unnamed: 0,cloud_cover,DateTime
0,0.990000,2021-11-16 11:15:00
1,0.989000,2021-11-16 11:16:00
2,0.987667,2021-11-16 11:17:00
3,0.987000,2021-11-16 11:18:00
4,0.987000,2021-11-16 11:19:00
...,...,...
15056,0.168000,2021-12-16 13:58:00
15057,0.169000,2021-12-16 13:59:00
15058,0.147000,2021-12-16 14:00:00
15059,0.145000,2021-12-16 14:01:00


In [4]:
def lags_train_test(data, lags):
	X, y = [], []
	for i in range(lags, len(data)):
		X.append(data[i-lags:i])
		y.append(data[i])
	return np.array(X), np.array(y)

In [5]:
def Xtrain_ytrain(train,test,lags):
    #scale train
    sc = MinMaxScaler()
    sc.fit(train)
    train_set_scale = sc.transform(train)
    # train
    X_train, y_train = lags_train_test(train_set_scale, lags)
    X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
    print("X_train Shape :", X_train.shape)
    print("y_train Shape :", y_train.shape)

    #scale test
    dataset_total = pd.concat((train['cloud_cover'], test['cloud_cover']), axis=0)
    inputs = dataset_total[len(dataset_total) - len(test) - lags:].values
    inputs = inputs.reshape(-1,1)
    inputs = sc.transform(inputs)
    # test
    X_test, y_test = lags_train_test(inputs, lags)
    print("X_test Shape : ", X_test.shape)
    print("y_test Shape : ", y_test.shape)
    return X_train, y_train, X_test, y_test, sc

In [6]:
train,test = train_test_split(cloud_cover, train_size=0.8, shuffle=False)
print('Train Shape', train.shape)
print('Test Shape', test.shape)

Train Shape (12048, 2)
Test Shape (3013, 2)


In [13]:
test

Unnamed: 0_level_0,cloud_cover
DateTime,Unnamed: 1_level_1
2021-12-10 15:24:00,0.538
2021-12-10 15:25:00,0.588
2021-12-10 15:26:00,0.566
2021-12-10 15:27:00,0.562
2021-12-10 15:28:00,0.612
...,...
2021-12-16 13:58:00,0.168
2021-12-16 13:59:00,0.169
2021-12-16 14:00:00,0.147
2021-12-16 14:01:00,0.145


In [7]:
train = train.set_index('DateTime')
test = test.set_index('DateTime')

# lag5

In [8]:
# X_train, y_train, X_test and y_test
X_train, y_train, X_test, y_test, sc = Xtrain_ytrain(train,test,5)

X_train Shape : (12043, 5, 1)
y_train Shape : (12043, 1)
X_test Shape :  (3013, 5, 1)
y_test Shape :  (3013, 1)




In [9]:
test1 = test.copy()

In [10]:
ccmin = models.load_model('../lstm_ver2/model_train_80%_lag5.h5')
cc10min= models.load_model('../lstm_ver2/model_train_80%_lag10.h5')
cc15min= models.load_model('../lstm_ver2/model_train_80%_lag15.h5')
cc20min= models.load_model('../lstm_ver2/model_train_80%_lag20.h5')
cc25min= models.load_model('../lstm_ver2/model_train_80%_lag25.h5')
cc30min= models.load_model('../lstm_ver2/model_train_80%_lag30.h5')

In [11]:
predict = ccmin.predict(X_test)
predict1 = sc.inverse_transform(predict)
test1['cloud_cover'] = predict1
test1



Unnamed: 0_level_0,cloud_cover
DateTime,Unnamed: 1_level_1
2021-12-10 15:24:00,0.566234
2021-12-10 15:25:00,0.546068
2021-12-10 15:26:00,0.594669
2021-12-10 15:27:00,0.567256
2021-12-10 15:28:00,0.568004
...,...
2021-12-16 13:58:00,0.170576
2021-12-16 13:59:00,0.170497
2021-12-16 14:00:00,0.171472
2021-12-16 14:01:00,0.150619


In [14]:
lstm_mape_error = mape(test["cloud_cover"], test1["cloud_cover"])
print(f'MAPE Error 5 : {lstm_mape_error}')

MAPE Error 5 : 0.0741578180197622


# lag10

In [15]:
# X_train, y_train, X_test and y_test
X_train1, y_train1, X_test1, y_test1, sc1 = Xtrain_ytrain(train,test,10)
test2 = test.copy()
predict2 = cc10min.predict(X_test1)
predict3 = sc1.inverse_transform(predict2)
test2['cloud_cover'] = predict3
lstm_mape_error1 = mape(test["cloud_cover"], test2["cloud_cover"])
print(f'MAPE Error 10 : {lstm_mape_error1}')

X_train Shape : (12038, 10, 1)
y_train Shape : (12038, 1)
X_test Shape :  (3013, 10, 1)
y_test Shape :  (3013, 1)




MAPE Error 10 : 0.0758120102081082


# lag15

In [16]:
# X_train, y_train, X_test and y_test
X_train2, y_train2, X_test2, y_test2, sc2 = Xtrain_ytrain(train,test,15)
test3 = test.copy()
predict4 = cc15min.predict(X_test2)
predict5 = sc2.inverse_transform(predict4)
test3['cloud_cover'] = predict5
lstm_mape_error2 = mape(test["cloud_cover"], test3["cloud_cover"])
print(f'MAPE Error 15 : {lstm_mape_error2}')

X_train Shape : (12033, 15, 1)
y_train Shape : (12033, 1)
X_test Shape :  (3013, 15, 1)
y_test Shape :  (3013, 1)




MAPE Error 15 : 0.09112633675639344


# lag20

In [32]:
# X_train, y_train, X_test and y_test
X_train3, y_train3, X_test3, y_test3, sc3 = Xtrain_ytrain(train,test,20)
test4 = test.copy()
predict6 = cc20min.predict(X_test3)
predict7 = sc3.inverse_transform(predict6)
test4['cloud_cover'] = predict7
lstm_mape_error3 = mape(test["cloud_cover"], test4["cloud_cover"])
print(f'MAPE Error 20 : {lstm_mape_error3}')

X_train Shape : (12028, 20, 1)
y_train Shape : (12028, 1)
X_test Shape :  (3013, 20, 1)
y_test Shape :  (3013, 1)
 1/95 [..............................] - ETA: 5s



MAPE Error 20 : 0.07241119428764212


# lag25

In [30]:
# X_train, y_train, X_test and y_test
X_train4, y_train4, X_test4, y_test4, sc4 = Xtrain_ytrain(train,test,25)
test5 = test.copy()
predict8 = cc25min.predict(X_test4)
predict9 = sc4.inverse_transform(predict8)
test5['cloud_cover'] = predict9
lstm_mape_error4 = mape(test["cloud_cover"], test5["cloud_cover"])
print(f'MAPE Error 25 : {lstm_mape_error4}')

X_train Shape : (12023, 25, 1)
y_train Shape : (12023, 1)
X_test Shape :  (3013, 25, 1)
y_test Shape :  (3013, 1)
10/95 [==>...........................] - ETA: 0s



MAPE Error 25 : 0.0719229878599183


# lag30

In [33]:
# X_train, y_train, X_test and y_test
X_train5, y_train5, X_test5, y_test5, sc5 = Xtrain_ytrain(train,test,30)
test6 = test.copy()
predict10 = cc30min.predict(X_test5)
predict11 = sc5.inverse_transform(predict10)
test6['cloud_cover'] = predict11
lstm_mape_error5 = mape(test["cloud_cover"], test6["cloud_cover"])
print(f'MAPE Error 30 : {lstm_mape_error5}')

X_train Shape : (12018, 30, 1)
y_train Shape : (12018, 1)
X_test Shape :  (3013, 30, 1)
y_test Shape :  (3013, 1)
 9/95 [=>............................] - ETA: 0s



MAPE Error 30 : 0.07501506948755776
